Pre-cleanup snapshot - all current files
🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
1
.obsidian/app.json
vendored
Normal file
1
.obsidian/app.json
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{}
|
||||
1
.obsidian/appearance.json
vendored
Normal file
1
.obsidian/appearance.json
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{}
|
||||
31
.obsidian/core-plugins.json
vendored
Normal file
31
.obsidian/core-plugins.json
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"file-explorer": true,
|
||||
"global-search": true,
|
||||
"switcher": true,
|
||||
"graph": true,
|
||||
"backlink": true,
|
||||
"canvas": true,
|
||||
"outgoing-link": true,
|
||||
"tag-pane": true,
|
||||
"properties": false,
|
||||
"page-preview": true,
|
||||
"daily-notes": true,
|
||||
"templates": true,
|
||||
"note-composer": true,
|
||||
"command-palette": true,
|
||||
"slash-command": false,
|
||||
"editor-status": true,
|
||||
"bookmarks": true,
|
||||
"markdown-importer": false,
|
||||
"zk-prefixer": false,
|
||||
"random-note": false,
|
||||
"outline": true,
|
||||
"word-count": true,
|
||||
"slides": false,
|
||||
"audio-recorder": false,
|
||||
"workspaces": false,
|
||||
"file-recovery": true,
|
||||
"publish": false,
|
||||
"sync": true,
|
||||
"webviewer": false
|
||||
}
|
||||
206
.obsidian/workspace.json
vendored
Normal file
206
.obsidian/workspace.json
vendored
Normal file
@@ -0,0 +1,206 @@
|
||||
{
|
||||
"main": {
|
||||
"id": "49f18c78518039c8",
|
||||
"type": "split",
|
||||
"children": [
|
||||
{
|
||||
"id": "fea88e09bce7fef2",
|
||||
"type": "tabs",
|
||||
"children": [
|
||||
{
|
||||
"id": "472092e9ada7a8e6",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "empty",
|
||||
"state": {},
|
||||
"icon": "lucide-file",
|
||||
"title": "New tab"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"direction": "vertical"
|
||||
},
|
||||
"left": {
|
||||
"id": "b510957437397946",
|
||||
"type": "split",
|
||||
"children": [
|
||||
{
|
||||
"id": "157d722a91bc8e15",
|
||||
"type": "tabs",
|
||||
"children": [
|
||||
{
|
||||
"id": "9001986372506f85",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "file-explorer",
|
||||
"state": {
|
||||
"sortOrder": "alphabetical",
|
||||
"autoReveal": false
|
||||
},
|
||||
"icon": "lucide-folder-closed",
|
||||
"title": "Files"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "1d6f26c2d2402f8e",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "search",
|
||||
"state": {
|
||||
"query": "",
|
||||
"matchingCase": false,
|
||||
"explainSearch": false,
|
||||
"collapseAll": false,
|
||||
"extraContext": false,
|
||||
"sortOrder": "alphabetical"
|
||||
},
|
||||
"icon": "lucide-search",
|
||||
"title": "Search"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "5b830db8721ad2ed",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "bookmarks",
|
||||
"state": {},
|
||||
"icon": "lucide-bookmark",
|
||||
"title": "Bookmarks"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"direction": "horizontal",
|
||||
"width": 481.5
|
||||
},
|
||||
"right": {
|
||||
"id": "a1ab5e22b95db49c",
|
||||
"type": "split",
|
||||
"children": [
|
||||
{
|
||||
"id": "245785f7c0bf960b",
|
||||
"type": "tabs",
|
||||
"children": [
|
||||
{
|
||||
"id": "31a2e09288336a61",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "backlink",
|
||||
"state": {
|
||||
"file": "modules/whoosh/backend/DOCUMENTATION_SUMMARY.md",
|
||||
"collapseAll": false,
|
||||
"extraContext": false,
|
||||
"sortOrder": "alphabetical",
|
||||
"showSearch": true,
|
||||
"searchQuery": "",
|
||||
"backlinkCollapsed": false,
|
||||
"unlinkedCollapsed": true
|
||||
},
|
||||
"icon": "links-coming-in",
|
||||
"title": "Backlinks for DOCUMENTATION_SUMMARY"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "57211ee20d0c9d61",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "outgoing-link",
|
||||
"state": {
|
||||
"file": "modules/whoosh/backend/DOCUMENTATION_SUMMARY.md",
|
||||
"linksCollapsed": false,
|
||||
"unlinkedCollapsed": true
|
||||
},
|
||||
"icon": "links-going-out",
|
||||
"title": "Outgoing links from DOCUMENTATION_SUMMARY"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "071c40df45653454",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "tag",
|
||||
"state": {
|
||||
"sortOrder": "frequency",
|
||||
"useHierarchy": true,
|
||||
"showSearch": false,
|
||||
"searchQuery": ""
|
||||
},
|
||||
"icon": "lucide-tags",
|
||||
"title": "Tags"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "1a55201803c42e38",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "outline",
|
||||
"state": {
|
||||
"file": "modules/whoosh/backend/DOCUMENTATION_SUMMARY.md",
|
||||
"followCursor": false,
|
||||
"showSearch": false,
|
||||
"searchQuery": ""
|
||||
},
|
||||
"icon": "lucide-list",
|
||||
"title": "Outline of DOCUMENTATION_SUMMARY"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"direction": "horizontal",
|
||||
"width": 300,
|
||||
"collapsed": true
|
||||
},
|
||||
"left-ribbon": {
|
||||
"hiddenItems": {
|
||||
"switcher:Open quick switcher": false,
|
||||
"graph:Open graph view": false,
|
||||
"canvas:Create new canvas": false,
|
||||
"daily-notes:Open today's daily note": false,
|
||||
"templates:Insert template": false,
|
||||
"command-palette:Open command palette": false
|
||||
}
|
||||
},
|
||||
"active": "9001986372506f85",
|
||||
"lastOpenFiles": [
|
||||
"modules/slurp/hcfs-python/hcfs/core/__pycache__/filesystem.cpython-310.pyc",
|
||||
"modules/slurp/hcfs-python/hcfs/core/__pycache__/context_db.cpython-310.pyc",
|
||||
"modules/slurp/hcfs-python/hcfs/core/__pycache__/__init__.cpython-310.pyc",
|
||||
"modules/slurp/hcfs-python/hcfs/core/__pycache__",
|
||||
"modules/slurp/hcfs-python/hcfs/__pycache__/__init__.cpython-310.pyc",
|
||||
"modules/slurp/hcfs-python/hcfs/__pycache__",
|
||||
"modules/whoosh/EVENT_CONFIGURATION_SYSTEM.md",
|
||||
"modules/whoosh/EVENT_CONFIGURATION_SYSTEM.md.tmp.1675830.1754294063541",
|
||||
"modules/whoosh/frontend/src/test/event-config-integration.test.ts",
|
||||
"modules/whoosh/frontend/src/test/event-config-integration.test.ts.tmp.1675830.1754293976289",
|
||||
"modules/whoosh/frontend/src/components/projects/EventTypeConfiguration.tsx",
|
||||
"modules/whoosh/frontend/src/components/projects/EventTypeConfiguration.tsx.tmp.1675830.1754293868591",
|
||||
"homepage-content.md",
|
||||
"modules/posthuman/docs/operations.md",
|
||||
"modules/posthuman/docs/development.md",
|
||||
"modules/posthuman/docs/api.md",
|
||||
"modules/posthuman/docs/deployment.md",
|
||||
"modules/posthuman/docs/architecture.md",
|
||||
"modules/posthuman/conductor-kernel/PERFORMANCE_OPTIMIZATION.md",
|
||||
"modules/posthuman/PROJECT_PLAN.md",
|
||||
"modules/posthuman/README.md",
|
||||
"modules/hmmm/PROJECT_PLAN.md",
|
||||
"modules/whoosh/backend/DEPLOYMENT_FIXES.md",
|
||||
"modules/whoosh/backend/DOCUMENTATION_SUMMARY.md",
|
||||
"modules/whoosh/docs/project-complete.md",
|
||||
"modules/whoosh/docs/environment-requirements.md",
|
||||
"modules/whoosh/docs/implementation-complete.md",
|
||||
"modules/whoosh/docs/LOCAL_DEVELOPMENT.md",
|
||||
"modules/whoosh/docs/phase3-completion-summary.md",
|
||||
"modules/whoosh/docs/phase4-completion-summary.md",
|
||||
"modules/whoosh/docs/phase5-completion-summary.md",
|
||||
"modules/whoosh/frontend/TESTING.md",
|
||||
"modules/whoosh/results/rosewood_qa_report_1751891435.md",
|
||||
"modules/whoosh/TESTING_STRATEGY.md",
|
||||
"modules/whoosh/REPORT.md",
|
||||
"modules/whoosh/README_DISTRIBUTED.md"
|
||||
]
|
||||
}
|
||||
184
homepage-content.md
Normal file
184
homepage-content.md
Normal file
@@ -0,0 +1,184 @@
|
||||
# CHORUS Services - Homepage Content
|
||||
|
||||
## Hero Section
|
||||
|
||||
### Primary Headline
|
||||
**AI Development Teams That Think, Learn, and Optimize Themselves**
|
||||
|
||||
### Secondary Headline
|
||||
The next evolution in AI orchestration: Self-optimizing agents that dynamically build optimal teams, learn from every interaction, and deliver auditable results with complete traceability.
|
||||
|
||||
### Value Proposition
|
||||
CHORUS Services transforms how AI development works. Our breakthrough orchestration platform creates autonomous development teams that continuously improve their own performance, automatically form optimal team compositions, and maintain complete audit trails of every decision.
|
||||
|
||||
---
|
||||
|
||||
## Key Innovations Section
|
||||
|
||||
### Self-Optimizing Intelligence
|
||||
**AI agents that get better with every task**
|
||||
|
||||
Our breakthrough reinforcement learning system enables agents to continuously optimize their own performance through real-time feedback loops. Each completed task makes the entire system more effective.
|
||||
|
||||
- **Sub-5ms task routing** with intelligent load balancing
|
||||
- **48GB distributed GPU infrastructure** for massive parallel processing
|
||||
- **Enterprise-grade monitoring** with real-time optimization
|
||||
|
||||
### Dynamic Team Formation
|
||||
**Perfect teams, automatically assembled**
|
||||
|
||||
Gone are the days of manually coordinating AI tools. CHORUS agents autonomously analyze task requirements and automatically form optimal team compositions from our 8 specialized agent roles.
|
||||
|
||||
- **Composable context management** - Knowledge components mix and match across projects
|
||||
- **Fine-tuned specialized models** optimized for specific development workflows
|
||||
- **Real-time team rebalancing** based on workload and capabilities
|
||||
|
||||
### Complete Auditability
|
||||
**Every decision traceable, every solution replayable**
|
||||
|
||||
Enterprise development demands transparency. CHORUS provides complete traceability of every decision with the ability to replay and understand exactly how solutions were developed.
|
||||
|
||||
- **Immutable decision logs** with cryptographic integrity
|
||||
- **Full solution replay capability** for debugging and compliance
|
||||
- **End-to-end workflow transparency** for regulatory requirements
|
||||
|
||||
---
|
||||
|
||||
## Target Audience Benefits
|
||||
|
||||
### For Enterprise Development Teams
|
||||
**10x your development velocity without losing control**
|
||||
|
||||
- Autonomous task distribution across optimal AI team compositions
|
||||
- Complete audit trails for compliance and quality assurance
|
||||
- Integration with existing enterprise development workflows
|
||||
- Real-time performance monitoring and optimization
|
||||
|
||||
### For Tech Startups
|
||||
**Compete with larger teams through AI force multiplication**
|
||||
|
||||
- Small team leverage through intelligent task orchestration
|
||||
- Automatic knowledge capture and reuse across projects
|
||||
- Cost-effective scaling without proportional headcount increases
|
||||
- Rapid iteration with continuous system improvement
|
||||
|
||||
### For Research Organizations
|
||||
**Auditable, repeatable AI-assisted research processes**
|
||||
|
||||
- Complete reproducibility of AI-assisted research workflows
|
||||
- Transparent decision-making processes for peer review
|
||||
- Collaborative reasoning between multiple specialized AI agents
|
||||
- Long-term knowledge accumulation and institutional memory
|
||||
|
||||
### For AI Companies
|
||||
**Cutting-edge orchestration for your own AI development**
|
||||
|
||||
- Advanced context management for complex AI development projects
|
||||
- Multi-model coordination for hybrid AI solutions
|
||||
- Performance optimization through continuous learning
|
||||
- Scalable infrastructure for distributed AI development
|
||||
|
||||
---
|
||||
|
||||
## Technical Differentiators
|
||||
|
||||
### Beyond Basic AI Tools
|
||||
CHORUS Services isn't another AI assistant or code completion tool. We've built the infrastructure that makes AI agents actually work together as high-performing development teams.
|
||||
|
||||
**Traditional AI Tools:**
|
||||
- Single-agent interactions
|
||||
- No persistent team memory
|
||||
- Manual coordination required
|
||||
- Limited task complexity
|
||||
|
||||
**CHORUS Services:**
|
||||
- Self-organizing multi-agent teams
|
||||
- Persistent organizational knowledge
|
||||
- Autonomous task coordination
|
||||
- Enterprise-scale complexity handling
|
||||
|
||||
### The CHORUS Ecosystem
|
||||
**Integrated components working in perfect harmony**
|
||||
|
||||
- **WHOOSH**: Intelligent workflow orchestration with role-based agent assignment
|
||||
- **BZZZ**: Peer-to-peer coordination without single points of failure
|
||||
- **SLURP**: Context management that learns what information matters
|
||||
- **COOEE**: Continuous feedback loops for system optimization
|
||||
- **HMMM**: Collaborative reasoning before critical decisions
|
||||
|
||||
---
|
||||
|
||||
## Proven Results
|
||||
|
||||
### Measurable Performance Improvements
|
||||
**Real metrics from production deployments**
|
||||
|
||||
- **92% reduction** in context loss events across development sessions
|
||||
- **78% reduction** in hallucinated or incorrect AI outputs
|
||||
- **40% fewer iterations** required for project completion
|
||||
- **60% reduction** in duplicated work across team members
|
||||
- **34% faster** overall project delivery times
|
||||
|
||||
### Enterprise-Ready Architecture
|
||||
**Built for scale, security, and reliability**
|
||||
|
||||
- Multi-tenant SaaS deployment with enterprise security
|
||||
- Hybrid cloud/on-premises deployment options
|
||||
- Role-based access control and complete audit logging
|
||||
- Integration with existing CI/CD and project management tools
|
||||
|
||||
---
|
||||
|
||||
## Business Outcomes Focus
|
||||
|
||||
### Reduce Development Risk
|
||||
- Complete transparency in AI decision-making processes
|
||||
- Audit trails for compliance and quality assurance
|
||||
- Reduced hallucinations through collaborative verification
|
||||
- Consistent results through continuous system optimization
|
||||
|
||||
### Accelerate Innovation
|
||||
- Faster iteration cycles through intelligent task orchestration
|
||||
- Knowledge reuse across projects and teams
|
||||
- Automatic optimization of development workflows
|
||||
- Scalable capacity without proportional cost increases
|
||||
|
||||
### Maintain Control
|
||||
- Full visibility into AI agent decision-making
|
||||
- Configurable guardrails and approval workflows
|
||||
- Human oversight integration at critical decision points
|
||||
- Complete solution replay for debugging and improvement
|
||||
|
||||
---
|
||||
|
||||
## Call to Action
|
||||
|
||||
### Primary CTA
|
||||
**Experience Self-Optimizing AI Development**
|
||||
*Schedule a live demonstration of autonomous team formation and optimization*
|
||||
|
||||
### Secondary CTAs
|
||||
- **View Technical Architecture** - Deep dive into our breakthrough orchestration platform
|
||||
- **Download Case Study** - See how CHORUS reduced development time by 40% for enterprise clients
|
||||
- **Request Private Demo** - See your specific development challenges solved in real-time
|
||||
|
||||
---
|
||||
|
||||
## Trust Indicators
|
||||
|
||||
### Production-Proven Technology
|
||||
"CHORUS Services isn't experimental - it's deployed and delivering measurable results in production environments today."
|
||||
|
||||
### Enterprise Security Standards
|
||||
- SOC 2 Type II compliant infrastructure
|
||||
- Enterprise-grade data encryption and access controls
|
||||
- Complete audit logging and compliance reporting
|
||||
- Hybrid deployment options for sensitive workloads
|
||||
|
||||
### Technical Leadership
|
||||
Built by the team that solved AI's fundamental context and coordination problems. Our research-to-production pipeline ensures breakthrough innovations reach enterprise customers quickly and reliably.
|
||||
|
||||
---
|
||||
|
||||
*Ready to transform your development velocity with self-optimizing AI teams?*
|
||||
**Schedule your demonstration today.**
|
||||
0
modules/hmmm/PROJECT_PLAN.md
Normal file
0
modules/hmmm/PROJECT_PLAN.md
Normal file
1
modules/posthuman
Submodule
1
modules/posthuman
Submodule
Submodule modules/posthuman added at 2e39cd8664
37
modules/shhh/Dockerfile
Normal file
37
modules/shhh/Dockerfile
Normal file
@@ -0,0 +1,37 @@
|
||||
# SHHH Secrets Sentinel Docker Image
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
libpq-dev \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements first for better caching
|
||||
COPY requirements.txt .
|
||||
|
||||
# Install Python dependencies
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
# Create data directories
|
||||
RUN mkdir -p /data /config /logs
|
||||
|
||||
# Set permissions
|
||||
RUN chmod +x main.py
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Default command (can be overridden)
|
||||
CMD ["python", "main.py", "--mode", "monitor", "--structured-logs"]
|
||||
319
modules/shhh/PROJECT_PLAN.md
Normal file
319
modules/shhh/PROJECT_PLAN.md
Normal file
@@ -0,0 +1,319 @@
|
||||
## Plan: Hybrid Secret Detection with Sanitized Log Replication
|
||||
|
||||
### 1. Objective
|
||||
|
||||
To implement a robust, two-stage secret detection pipeline that:
|
||||
1. Reads from a primary hypercore log in real-time.
|
||||
2. Uses a fast, regex-based scanner for initial detection.
|
||||
3. Leverages a local LLM (via Ollama) for deeper, context-aware analysis of potential secrets to reduce false positives.
|
||||
4. Writes a fully sanitized version of the log to a new, parallel "sister" hypercore stream.
|
||||
5. Quarantines and alerts on confirmed high-severity secrets, ensuring the original log remains untouched for audit purposes while the sanitized log is safe for wider consumption.
|
||||
|
||||
### 2. High-Level Architecture & Data Flow
|
||||
|
||||
The process will follow this data flow:
|
||||
|
||||
```
|
||||
┌──────────────────────────┐
|
||||
[Primary Hypercore Log] ─────► │ HypercoreReader │
|
||||
└────────────┬─────────────┘
|
||||
│ (Raw Log Entry)
|
||||
▼
|
||||
┌──────────<E29480><E29480>───────────────┐
|
||||
│ MessageProcessor │
|
||||
│ (Orchestrator) │
|
||||
└────────────┬─────────────┘
|
||||
│
|
||||
┌───────────────────────▼───────────────────────┐
|
||||
│ Stage 1: Fast Regex Scan │
|
||||
│ (SecretDetector) │
|
||||
└───────────────────────┬───────────────────────┘
|
||||
│
|
||||
┌───────────────────────────┼───────────────────────────┐
|
||||
│ (No Match) │ (Potential Match) │ (High-Confidence Match)
|
||||
▼ ▼ ▼
|
||||
┌──────────────────────────┐ ┌─<E2948C><E29480>────────────────────────┐ ┌──────────────────────────┐
|
||||
│ SanitizedWriter │ │ Stage 2: LLM Analysis │ │ (Skip LLM) │
|
||||
│ (Writes original entry) │ │ (LLMAnalyzer) │ │ Quarantine Immediately │
|
||||
└──────────────────────────┘ └────────────┬─────────────┘ └────────────┬─────────────┘
|
||||
▲ │ (LLM Confirms) │
|
||||
│ ▼ ▼
|
||||
│ ┌──────────────────────────┐ ┌──────────────────────────┐
|
||||
│ │ QuarantineManager │ │ Alerting System │
|
||||
│ │ (DB Storage, Alerts) │ │ (Webhooks) │
|
||||
│ └──────────────────────────┘ └────────────<E29480><E29480>─────────────┘
|
||||
│ │
|
||||
│ ▼
|
||||
│ ┌──────────────────────────┐
|
||||
└──────────────┤ SanitizedWriter │
|
||||
│ (Writes REDACTED entry) │
|
||||
└──────────────────────────┘
|
||||
│
|
||||
▼
|
||||
[Sanitized Hypercore Log]
|
||||
```
|
||||
|
||||
### 3. Component Implementation Plan
|
||||
|
||||
This plan modifies existing components and adds new ones.
|
||||
|
||||
#### 3.1. New Component: `core/llm_analyzer.py`
|
||||
|
||||
This new file will contain all logic for interacting with the Ollama instance. This isolates the dependency and makes it easy to test or swap out the LLM backend.
|
||||
|
||||
```python
|
||||
# core/llm_analyzer.py
|
||||
import requests
|
||||
import json
|
||||
|
||||
class LLMAnalyzer:
|
||||
"""Analyzes text for secrets using a local LLM via Ollama."""
|
||||
|
||||
def __init__(self, endpoint: str, model: str, system_prompt: str):
|
||||
self.endpoint = endpoint
|
||||
self.model = model
|
||||
self.system_prompt = system_prompt
|
||||
|
||||
def analyze(self, text: str) -> dict:
|
||||
"""
|
||||
Sends text to the Ollama API for analysis and returns a structured JSON response.
|
||||
|
||||
Returns:
|
||||
A dictionary like:
|
||||
{
|
||||
"secret_found": bool,
|
||||
"secret_type": str,
|
||||
"confidence_score": float,
|
||||
"severity": str
|
||||
}
|
||||
Returns a default "not found" response on error.
|
||||
"""
|
||||
prompt = f"Log entry: \"{text}\"\n\nAnalyze this for secrets and respond with only the required JSON."
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"system": self.system_prompt,
|
||||
"prompt": prompt,
|
||||
"format": "json",
|
||||
"stream": False
|
||||
}
|
||||
try:
|
||||
response = requests.post(self.endpoint, json=payload, timeout=15)
|
||||
response.raise_for_status()
|
||||
# The response from Ollama is a JSON string, which needs to be parsed.
|
||||
analysis = json.loads(response.json().get("response", "{}"))
|
||||
return analysis
|
||||
except (requests.exceptions.RequestException, json.JSONDecodeError) as e:
|
||||
print(f"[ERROR] LLMAnalyzer failed: {e}")
|
||||
# Fallback: If LLM fails, assume no secret was found to avoid blocking the pipeline.
|
||||
return {"secret_found": False}
|
||||
```
|
||||
|
||||
#### 3.2. New Component: `core/sanitized_writer.py`
|
||||
|
||||
This component is responsible for writing to the new, sanitized hypercore log. This abstraction allows us to easily change the output destination in the future.
|
||||
|
||||
```python
|
||||
# core/sanitized_writer.py
|
||||
class SanitizedWriter:
|
||||
"""Writes log entries to the sanitized sister hypercore log."""
|
||||
|
||||
def __init__(self, sanitized_log_path: str):
|
||||
self.log_path = sanitized_log_path
|
||||
# Placeholder for hypercore writing logic. For now, we'll append to a file.
|
||||
self.log_file = open(self.log_path, "a")
|
||||
|
||||
def write(self, log_entry: str):
|
||||
"""Writes a single log entry to the sanitized stream."""
|
||||
self.log_file.write(log_entry + "\n")
|
||||
self.log_file.flush()
|
||||
|
||||
def close(self):
|
||||
self.log_file.close()
|
||||
```
|
||||
|
||||
#### 3.3. Modify: `core/detector.py`
|
||||
|
||||
We will enhance the `SecretDetector` to not only find matches but also redact them.
|
||||
|
||||
```python
|
||||
# core/detector.py
|
||||
import re
|
||||
|
||||
class SecretDetector:
|
||||
def __init__(self, patterns_file: str = "patterns.yaml"):
|
||||
# ... (load_patterns remains the same) ...
|
||||
|
||||
def scan(self, text: str) -> list[dict]:
|
||||
"""Scans text and returns a list of found secrets with metadata."""
|
||||
matches = []
|
||||
for pattern_name, pattern in self.patterns.items():
|
||||
if pattern.get("active", True):
|
||||
regex_match = re.search(pattern["regex"], text)
|
||||
if regex_match:
|
||||
matches.append({
|
||||
"secret_type": pattern_name,
|
||||
"value": regex_match.group(0),
|
||||
"confidence": pattern.get("confidence", 0.8), # Default confidence
|
||||
"severity": pattern.get("severity", "MEDIUM")
|
||||
})
|
||||
return matches
|
||||
|
||||
def redact(self, text: str, secret_value: str) -> str:
|
||||
"""Redacts a specific secret value within a string."""
|
||||
redacted_str = secret_value[:4] + "****" + secret_value[-4:]
|
||||
return text.replace(secret_value, f"[REDACTED:{redacted_str}]")
|
||||
```
|
||||
|
||||
#### 3.4. Modify: `pipeline/processor.py`
|
||||
|
||||
This is the orchestrator and will see the most significant changes to implement the hybrid logic.
|
||||
|
||||
```python
|
||||
# pipeline/processor.py
|
||||
from core.hypercore_reader import HypercoreReader
|
||||
from core.detector import SecretDetector
|
||||
from core.llm_analyzer import LLMAnalyzer
|
||||
from core.quarantine import QuarantineManager
|
||||
from core.sanitized_writer import SanitizedWriter
|
||||
|
||||
class MessageProcessor:
|
||||
def __init__(self, reader: HypercoreReader, detector: SecretDetector, llm_analyzer: LLMAnalyzer, quarantine: QuarantineManager, writer: SanitizedWriter, llm_threshold: float):
|
||||
self.reader = reader
|
||||
self.detector = detector
|
||||
self.llm_analyzer = llm_analyzer
|
||||
self.quarantine = quarantine
|
||||
self.writer = writer
|
||||
self.llm_threshold = llm_threshold # e.g., 0.90
|
||||
|
||||
async def process_stream(self):
|
||||
"""Main processing loop for the hybrid detection model."""
|
||||
async for entry in self.reader.stream_entries():
|
||||
# Stage 1: Fast Regex Scan
|
||||
regex_matches = self.detector.scan(entry.content)
|
||||
|
||||
if not regex_matches:
|
||||
# No secrets found, write original entry to sanitized log
|
||||
self.writer.write(entry.content)
|
||||
continue
|
||||
|
||||
# A potential secret was found. Default to sanitized, but may be quarantined.
|
||||
sanitized_content = entry.content
|
||||
should_quarantine = False
|
||||
confirmed_secret = None
|
||||
|
||||
for match in regex_matches:
|
||||
# High-confidence regex matches trigger immediate quarantine, skipping LLM.
|
||||
if match['confidence'] >= self.llm_threshold:
|
||||
should_quarantine = True
|
||||
confirmed_secret = match
|
||||
break # One high-confidence match is enough
|
||||
|
||||
# Stage 2: Low-confidence matches go to LLM for verification.
|
||||
llm_result = self.llm_analyzer.analyze(entry.content)
|
||||
if llm_result.get("secret_found"):
|
||||
should_quarantine = True
|
||||
# Prefer LLM's classification but use regex value for redaction
|
||||
confirmed_secret = {
|
||||
"secret_type": llm_result.get("secret_type", match['secret_type']),
|
||||
"value": match['value'],
|
||||
"severity": llm_result.get("severity", match['severity'])
|
||||
}
|
||||
break
|
||||
|
||||
if should_quarantine and confirmed_secret:
|
||||
# A secret is confirmed. Redact, quarantine, and alert.
|
||||
sanitized_content = self.detector.redact(entry.content, confirmed_secret['value'])
|
||||
self.quarantine.quarantine_message(
|
||||
message=entry,
|
||||
secret_type=confirmed_secret['secret_type'],
|
||||
severity=confirmed_secret['severity'],
|
||||
redacted_content=sanitized_content
|
||||
)
|
||||
# Potentially trigger alerts here as well
|
||||
print(f"[ALERT] Confirmed secret {confirmed_secret['secret_type']} found and quarantined.")
|
||||
|
||||
# Write the (potentially redacted) content to the sanitized log
|
||||
self.writer.write(sanitized_content)
|
||||
```
|
||||
|
||||
#### 3.5. Modify: `main.py`
|
||||
|
||||
The main entry point will be updated to instantiate and wire together the new and modified components.
|
||||
|
||||
```python
|
||||
# main.py
|
||||
# ... imports ...
|
||||
import asyncio
|
||||
from core.hypercore_reader import HypercoreReader
|
||||
from core.detector import SecretDetector
|
||||
from core.llm_analyzer import LLMAnalyzer
|
||||
from core.quarantine import QuarantineManager
|
||||
from core.sanitized_writer import SanitizedWriter
|
||||
# ... other imports
|
||||
|
||||
def main():
|
||||
# 1. Configuration
|
||||
# Load from a new config.yaml or environment variables
|
||||
PRIMARY_LOG_PATH = "/path/to/primary/hypercore.log"
|
||||
SANITIZED_LOG_PATH = "/path/to/sanitized/hypercore.log"
|
||||
PATTERNS_PATH = "patterns.yaml"
|
||||
DB_CONNECTION = "..."
|
||||
OLLAMA_ENDPOINT = "http://localhost:11434/api/generate"
|
||||
OLLAMA_MODEL = "llama3"
|
||||
LLM_CONFIDENCE_THRESHOLD = 0.90 # Regex confidence >= this skips LLM
|
||||
|
||||
with open("SHHH_SECRETS_SENTINEL_AGENT_PROMPT.md", "r") as f:
|
||||
OLLAMA_SYSTEM_PROMPT = f.read()
|
||||
|
||||
# 2. Instantiation
|
||||
reader = HypercoreReader(PRIMARY_LOG_PATH)
|
||||
detector = SecretDetector(PATTERNS_PATH)
|
||||
llm_analyzer = LLMAnalyzer(OLLAMA_ENDPOINT, OLLAMA_MODEL, OLLAMA_SYSTEM_PROMPT)
|
||||
quarantine = QuarantineManager(DB_CONNECTION)
|
||||
writer = SanitizedWriter(SANITIZED_LOG_PATH)
|
||||
|
||||
processor = MessageProcessor(
|
||||
reader=reader,
|
||||
detector=detector,
|
||||
llm_analyzer=llm_analyzer,
|
||||
quarantine=quarantine,
|
||||
writer=writer,
|
||||
llm_threshold=LLM_CONFIDENCE_THRESHOLD
|
||||
)
|
||||
|
||||
# 3. Execution
|
||||
print("Starting SHHH Hypercore Monitor...")
|
||||
try:
|
||||
asyncio.run(processor.process_stream())
|
||||
except KeyboardInterrupt:
|
||||
print("Shutting down...")
|
||||
finally:
|
||||
writer.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
### 4. Phased Rollout
|
||||
|
||||
1. **Phase 1: Component Implementation (1-2 days)**
|
||||
* Create `core/llm_analyzer.py` and `core/sanitized_writer.py`.
|
||||
* Write unit tests for both new components. Mock the `requests` calls for the analyzer.
|
||||
* Update `core/detector.py` with the `redact` method and update its unit tests.
|
||||
|
||||
2. **Phase 2: Orchestration Logic (2-3 days)**
|
||||
* Implement the new logic in `pipeline/processor.py`.
|
||||
* Write integration tests for the processor that simulate the full flow: no match, low-confidence match (with mocked LLM response), and high-confidence match.
|
||||
* Update `main.py` to wire everything together.
|
||||
|
||||
3. **Phase 3: Configuration & Testing (1 day)**
|
||||
* Add a `config.yaml` to manage all paths, thresholds, and endpoints.
|
||||
* Perform an end-to-end test run with a sample log file and a running Ollama instance.
|
||||
* Verify that the primary log is untouched, the sanitized log is created correctly (with and without redactions), and the quarantine database is populated as expected.
|
||||
|
||||
### 5. Success Criteria
|
||||
|
||||
* **Zero Leaks:** The sanitized log stream contains no secrets.
|
||||
* **High Accuracy:** False positive rate is demonstrably lower than a regex-only solution, verified during testing.
|
||||
* **Performance:** The pipeline maintains acceptable latency (<200ms per log entry on average, accounting for occasional LLM analysis).
|
||||
* **Auditability:** The primary log remains a perfect, unaltered source of truth. All detection and quarantine events are logged in the PostgreSQL database.
|
||||
561
modules/shhh/README.md
Normal file
561
modules/shhh/README.md
Normal file
@@ -0,0 +1,561 @@
|
||||
🔥 Excellent — let’s push this all the way into a **production-grade spec**.
|
||||
|
||||
---
|
||||
|
||||
## 📂 **1️⃣ Feedback Ingestion Spec**
|
||||
|
||||
This defines how curators/humans give feedback to the Sentinel so it can **update its detection rules (patterns.yaml)** safely.
|
||||
|
||||
---
|
||||
|
||||
### 🔄 **Feedback Flow**
|
||||
|
||||
1. **Curator/Reviewer sees alert** → marks it as:
|
||||
|
||||
* `false_positive` (regex over-triggered)
|
||||
* `missed_secret` (regex failed to detect)
|
||||
* `uncertain` (needs better regex refinement)
|
||||
|
||||
2. **Feedback API** ingests the report:
|
||||
|
||||
```json
|
||||
{
|
||||
"alert_id": "log_345",
|
||||
"secret_type": "AWS_ACCESS_KEY",
|
||||
"feedback_type": "false_positive",
|
||||
"evidence": "Key was dummy data: TESTKEY123",
|
||||
"suggested_regex_fix": null
|
||||
}
|
||||
```
|
||||
|
||||
3. **Meta-Learner** updates rules:
|
||||
|
||||
* `false_positive` → adds **exceptions** (e.g., allowlist prefixes like `TESTKEY`).
|
||||
* `missed_secret` → drafts **new regex** from evidence (using regex generator or LLM).
|
||||
* Writes changes to **patterns.yaml** under `pending_review`.
|
||||
|
||||
4. **Security admin approves** before the new regex is marked `active: true`.
|
||||
|
||||
---
|
||||
|
||||
### 🧠 **Feedback Schema in YAML**
|
||||
|
||||
```yaml
|
||||
pending_updates:
|
||||
- regex_name: AWS_ACCESS_KEY
|
||||
action: modify
|
||||
new_regex: "AKIA[0-9A-Z]{16}(?!TESTKEY)"
|
||||
confidence: 0.82
|
||||
status: "pending human review"
|
||||
submitted_by: curator_2
|
||||
timestamp: 2025-08-02T12:40:00Z
|
||||
```
|
||||
|
||||
✅ This keeps **audit trails** & allows **safe hot updates**.
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ **2️⃣ Real AWS/GitHub Webhook Payload Templates**
|
||||
|
||||
These are **example POST payloads** your Sentinel would send when it detects a leaked secret.
|
||||
|
||||
---
|
||||
|
||||
### 🔐 **AWS Access Key Revocation**
|
||||
|
||||
**Endpoint:**
|
||||
`POST https://security.example.com/hooks/aws-revoke`
|
||||
|
||||
**Payload:**
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "secret_leak_detected",
|
||||
"secret_type": "AWS_ACCESS_KEY",
|
||||
"redacted_key": "AKIA****XYZ",
|
||||
"log_reference": "hyperlog:58321",
|
||||
"recommended_action": "Revoke IAM access key immediately",
|
||||
"severity": "HIGH",
|
||||
"timestamp": "2025-08-02T12:45:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
➡ Your security automation would call AWS CLI or IAM API:
|
||||
|
||||
```bash
|
||||
aws iam update-access-key --access-key-id <redacted> --status Inactive
|
||||
aws iam delete-access-key --access-key-id <redacted>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 🐙 **GitHub Token Revocation**
|
||||
|
||||
**Endpoint:**
|
||||
`POST https://security.example.com/hooks/github-revoke`
|
||||
|
||||
**Payload:**
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "secret_leak_detected",
|
||||
"secret_type": "GITHUB_TOKEN",
|
||||
"redacted_key": "ghp_****abcd",
|
||||
"repository": "repo-name",
|
||||
"log_reference": "hyperlog:58322",
|
||||
"severity": "HIGH",
|
||||
"recommended_action": "Invalidate GitHub token via API",
|
||||
"timestamp": "2025-08-02T12:46:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
➡ This would tie into GitHub’s [token-scanning API](https://docs.github.com/en/developers/overview/secret-scanning) or use PAT revocation.
|
||||
|
||||
---
|
||||
|
||||
### 💬 **Slack Token Revocation**
|
||||
|
||||
**Endpoint:**
|
||||
`POST https://security.example.com/hooks/slack-revoke`
|
||||
|
||||
**Payload:**
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "secret_leak_detected",
|
||||
"secret_type": "SLACK_TOKEN",
|
||||
"redacted_key": "xoxb****hjk",
|
||||
"workspace": "company-slack",
|
||||
"log_reference": "hyperlog:58323",
|
||||
"severity": "HIGH",
|
||||
"recommended_action": "Revoke Slack bot/user token",
|
||||
"timestamp": "2025-08-02T12:47:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
➡ Slack Admin API can be used to **revoke** or **rotate**.
|
||||
|
||||
---
|
||||
|
||||
## 📡 **3️⃣ Redis or PostgreSQL Quarantine Store**
|
||||
|
||||
Switching from memory to **persistent storage** means quarantined logs survive restarts.
|
||||
|
||||
---
|
||||
|
||||
### ✅ **Redis Option (Fast, Volatile)**
|
||||
|
||||
```python
|
||||
import redis, json
|
||||
r = redis.Redis(host='localhost', port=6379, decode_responses=True)
|
||||
|
||||
def quarantine_log(log_line, reason):
|
||||
entry = {"timestamp": datetime.utcnow().isoformat() + "Z", "reason": reason, "log_line": log_line}
|
||||
r.lpush("quarantine", json.dumps(entry))
|
||||
print(f"[QUARANTINE] Stored in Redis: {reason}")
|
||||
```
|
||||
|
||||
* 🏎 **Pros:** Fast, easy to scale.
|
||||
* ⚠️ **Cons:** Volatile unless persisted (RDB/AOF).
|
||||
|
||||
---
|
||||
|
||||
### ✅ **PostgreSQL Option (Auditable, Durable)**
|
||||
|
||||
**Schema:**
|
||||
|
||||
```sql
|
||||
CREATE TABLE quarantine (
|
||||
id SERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
reason TEXT NOT NULL,
|
||||
log_line TEXT NOT NULL,
|
||||
reviewed BOOLEAN DEFAULT FALSE
|
||||
);
|
||||
```
|
||||
|
||||
**Python Insert:**
|
||||
|
||||
```python
|
||||
import psycopg2
|
||||
|
||||
conn = psycopg2.connect("dbname=sentinel user=postgres password=secret")
|
||||
cursor = conn.cursor()
|
||||
|
||||
def quarantine_log(log_line, reason):
|
||||
entry_time = datetime.utcnow().isoformat() + "Z"
|
||||
cursor.execute(
|
||||
"INSERT INTO quarantine (timestamp, reason, log_line) VALUES (%s, %s, %s)",
|
||||
(entry_time, reason, log_line)
|
||||
)
|
||||
conn.commit()
|
||||
print(f"[QUARANTINE] Stored in PostgreSQL: {reason}")
|
||||
```
|
||||
|
||||
✅ **Postgres is better for long-term auditing** — you can run reports like:
|
||||
|
||||
* “How many AWS keys leaked this month?”
|
||||
* “Which agents generated the most HIGH-severity quarantines?”
|
||||
|
||||
---
|
||||
|
||||
We now have:
|
||||
✅ **Detection → Redaction → Quarantine → Revocation → Feedback → Pattern Evolution**
|
||||
✅ **patterns.yaml** for versioned regex
|
||||
✅ **Webhooks** for real-time secret revocation
|
||||
✅ **Persistent quarantine store** (Redis or Postgres)
|
||||
|
||||
---
|
||||
|
||||
## 📜 **1️⃣ Migration Script: Redis → PostgreSQL**
|
||||
|
||||
This script will migrate existing quarantined log entries from **Redis** to **Postgres**.
|
||||
|
||||
```python
|
||||
import redis, json, psycopg2
|
||||
from datetime import datetime
|
||||
|
||||
# Redis config
|
||||
r = redis.Redis(host='localhost', port=6379, decode_responses=True)
|
||||
|
||||
# Postgres config
|
||||
conn = psycopg2.connect("dbname=sentinel user=postgres password=secret")
|
||||
cursor = conn.cursor()
|
||||
|
||||
def migrate_quarantine():
|
||||
count = 0
|
||||
while True:
|
||||
entry_json = r.rpop("quarantine") # pop oldest entry from Redis
|
||||
if not entry_json:
|
||||
break
|
||||
entry = json.loads(entry_json)
|
||||
cursor.execute(
|
||||
"INSERT INTO quarantine (timestamp, reason, log_line) VALUES (%s, %s, %s)",
|
||||
(entry["timestamp"], entry["reason"], entry["log_line"])
|
||||
)
|
||||
count += 1
|
||||
conn.commit()
|
||||
print(f"[MIGRATION] Moved {count} quarantined entries from Redis → PostgreSQL")
|
||||
|
||||
if __name__ == "__main__":
|
||||
migrate_quarantine()
|
||||
```
|
||||
|
||||
✅ **Run once** after Postgres is set up — empties Redis queue into the durable DB.
|
||||
|
||||
---
|
||||
|
||||
## 🖥 **2️⃣ Admin Dashboard Spec**
|
||||
|
||||
**Purpose:** A web UI to manage the Sentinel’s security pipeline.
|
||||
|
||||
---
|
||||
|
||||
### 🎯 **Core Features**
|
||||
|
||||
✅ **Quarantine Browser**
|
||||
|
||||
* Paginated view of all quarantined logs
|
||||
* Search/filter by `secret_type`, `source_agent`, `date`, `status`
|
||||
* Mark quarantined logs as **reviewed** or **false alarm**
|
||||
|
||||
✅ **Regex Rules Manager**
|
||||
|
||||
* Lists all regexes from `patterns.yaml`
|
||||
* Add / update / deactivate rules via UI
|
||||
* Shows `pending_updates` flagged by the Meta-Learner for human approval
|
||||
|
||||
✅ **Revocation Status Board**
|
||||
|
||||
* See which secrets triggered revocations
|
||||
* Status of revocation hooks (success/fail)
|
||||
|
||||
✅ **Metrics Dashboard**
|
||||
|
||||
* Charts: “Secrets Detected Over Time”, “Top Sources of Leaks”
|
||||
* KPIs: # HIGH severity secrets this month, # rules updated, # false positives
|
||||
|
||||
---
|
||||
|
||||
### 🏗 **Tech Stack Suggestion**
|
||||
|
||||
* **Backend:** FastAPI (Python)
|
||||
* **Frontend:** React + Tailwind
|
||||
* **DB:** PostgreSQL for quarantine + rules history
|
||||
* **Auth:** OAuth (GitHub/Google) + RBAC (only security admins can approve regex changes)
|
||||
|
||||
---
|
||||
|
||||
### 🔌 **Endpoints**
|
||||
|
||||
```
|
||||
GET /api/quarantine → list quarantined entries
|
||||
POST /api/quarantine/review → mark entry as reviewed
|
||||
GET /api/rules → list regex patterns
|
||||
POST /api/rules/update → update or add a regex
|
||||
GET /api/revocations → list revocation events
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 🖥 **Mock Dashboard Layout**
|
||||
|
||||
* **Left Nav:** Quarantine | Rules | Revocations | Metrics
|
||||
* **Main Panel:**
|
||||
|
||||
* Data tables with sorting/filtering
|
||||
* Inline editors for regex rules
|
||||
* Approve/Reject buttons for pending regex updates
|
||||
|
||||
✅ Basically a **security control room** for Sentinel.
|
||||
|
||||
---
|
||||
|
||||
## 🤖 **3️⃣ Meta-Curator AI Prompt**
|
||||
|
||||
This agent reviews Sentinel’s work and **tunes it automatically**.
|
||||
|
||||
---
|
||||
|
||||
### **Meta-Curator: System Prompt**
|
||||
|
||||
> **Role & Mission:**
|
||||
> You are the **Meta-Curator**, a supervisory AI responsible for reviewing the **Secrets Sentinel’s** detections, regex updates, and feedback reports.
|
||||
>
|
||||
> **Core Responsibilities:**
|
||||
> ✅ **Audit alerts** – Look for false positives, duplicates, or missed leaks by cross-checking Sentinel outputs.
|
||||
> ✅ **Review regex proposals** – When Sentinel drafts new regex rules, decide if they’re:
|
||||
>
|
||||
> * ✅ Approved (safe to activate)
|
||||
> * ❌ Rejected (too broad or incorrect)
|
||||
> * 🕒 Deferred (needs human review)
|
||||
> ✅ **Tune detection thresholds** – Adjust `confidence` or `severity` on patterns based on outcomes.
|
||||
> ✅ **Generate new rules** – If multiple missed secrets share a format, draft a regex and submit to humans for approval.
|
||||
> ✅ **Report upstream** – Summarize changes to security admins weekly.
|
||||
|
||||
---
|
||||
|
||||
### **Behavior Guidelines**
|
||||
|
||||
* **Conservative by default:** Don’t auto-approve regexes unless confidence > 0.95.
|
||||
* **Keep auditability:** Every decision (approve/reject) is logged in the hyperlog.
|
||||
* **Respect human overrides:** Never overwrite a regex that a human explicitly locked.
|
||||
|
||||
---
|
||||
|
||||
### **Example Meta-Curator Output**
|
||||
|
||||
```json
|
||||
{
|
||||
"action": "approve_regex",
|
||||
"regex_name": "GITLAB_TOKEN",
|
||||
"regex_pattern": "glpat-[0-9A-Za-z\\-_]{20}",
|
||||
"confidence": 0.97,
|
||||
"decision_reason": "Validated against 12 quarantined examples, no false positives found.",
|
||||
"timestamp": "2025-08-02T13:45:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
✅ This meta-agent is the **brains of the rules layer** — keeps Sentinel evolving, but under control.
|
||||
|
||||
---
|
||||
|
||||
## 🚀 **Now You Have:**
|
||||
|
||||
✅ **Migration Path** → Redis → PostgreSQL
|
||||
✅ **Admin Dashboard Spec** → complete with endpoints & layout
|
||||
✅ **Meta-Curator Prompt** → the agent that “manages the manager”
|
||||
|
||||
Alright — here’s the next batch to lock this into a **real, buildable system**.
|
||||
|
||||
---
|
||||
|
||||
## 📂 **1️⃣ `patterns_history` Table Schema**
|
||||
|
||||
This tracks **every regex change** ever made — who/what made it, why, and when.
|
||||
|
||||
```sql
|
||||
CREATE TABLE patterns_history (
|
||||
id SERIAL PRIMARY KEY,
|
||||
regex_name TEXT NOT NULL,
|
||||
old_regex TEXT,
|
||||
new_regex TEXT,
|
||||
action TEXT CHECK (action IN ('add', 'update', 'remove')),
|
||||
confidence NUMERIC(3,2),
|
||||
status TEXT CHECK (status IN ('approved', 'pending', 'rejected')),
|
||||
submitted_by TEXT NOT NULL,
|
||||
approved_by TEXT,
|
||||
decision_reason TEXT,
|
||||
timestamp TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
```
|
||||
|
||||
### ✅ What this gives you:
|
||||
|
||||
* **Full audit trail** (critical for security compliance).
|
||||
* You can run queries like:
|
||||
|
||||
* *“Show all regex changes made by Meta-Curator vs. humans.”*
|
||||
* *“List all rules rejected in the last 90 days.”*
|
||||
|
||||
---
|
||||
|
||||
## 🖼 **2️⃣ Admin Dashboard Wireframes**
|
||||
|
||||
**Goal:** show your devs exactly what to build — no ambiguity.
|
||||
|
||||
---
|
||||
|
||||
### **🔒 Dashboard Home**
|
||||
|
||||
```
|
||||
------------------------------------------------------
|
||||
| [Sentinel Logo] Secrets Sentinel Dashboard |
|
||||
------------------------------------------------------
|
||||
| Quarantine | Rules | Revocations | Metrics | Admin |
|
||||
------------------------------------------------------
|
||||
| Welcome back, Security Admin! |
|
||||
| |
|
||||
| ▢ 32 Quarantined logs waiting review |
|
||||
| ▢ 4 Pending regex updates |
|
||||
| ▢ 2 Failed revocation hooks |
|
||||
------------------------------------------------------
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### **🗄 Quarantine View**
|
||||
|
||||
```
|
||||
------------------------------------------------------
|
||||
| Quarantine Logs |
|
||||
------------------------------------------------------
|
||||
| Search: [______________] [Filter ▼] |
|
||||
------------------------------------------------------
|
||||
| Log ID | Secret Type | Severity | Status |
|
||||
------------------------------------------------------
|
||||
| log_4287 | AWS_ACCESS_KEY| HIGH | PENDING |
|
||||
| log_4288 | JWT | MEDIUM | REVIEWED|
|
||||
| log_4289 | SSH_KEY | HIGH | PENDING |
|
||||
------------------------------------------------------
|
||||
[ View Details ] [ Mark as Reviewed ] [ Delete ]
|
||||
```
|
||||
|
||||
**Clicking “View Details” →** shows full log snippet (with redacted secret).
|
||||
|
||||
---
|
||||
|
||||
### **📜 Regex Manager**
|
||||
|
||||
```
|
||||
------------------------------------------------------
|
||||
| Regex Rules |
|
||||
------------------------------------------------------
|
||||
| Name | Regex Pattern | Active |
|
||||
------------------------------------------------------
|
||||
| AWS_ACCESS_KEY | AKIA[0-9A-Z]{16} | ✔ |
|
||||
| JWT | eyJ[A-Za-z0-9_-]+?\.[…] | ✔ |
|
||||
| SLACK_TOKEN | xox[baprs]-[0-9A-Za-z-]{10,48} | ✔ |
|
||||
------------------------------------------------------
|
||||
[ Add New Regex ] [ View History ]
|
||||
```
|
||||
|
||||
Clicking **View History** → pulls from `patterns_history`.
|
||||
|
||||
---
|
||||
|
||||
### **📊 Metrics View**
|
||||
|
||||
* **Line Chart:** “Secrets Detected Over Time”
|
||||
* **Bar Chart:** “Secrets by Type” (AWS, GitHub, JWT, etc.)
|
||||
* **KPIs:**
|
||||
|
||||
* 🔴 High Severity Leaks: 12 this week
|
||||
* 🟢 Regex Accuracy: 94%
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ **3️⃣ FastAPI Skeleton**
|
||||
|
||||
Here’s the **starter code** for your dev team to run with.
|
||||
|
||||
```python
|
||||
from fastapi import FastAPI, Depends
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
import psycopg2, json
|
||||
|
||||
app = FastAPI(title="Secrets Sentinel Dashboard API")
|
||||
|
||||
# --- Database Setup ---
|
||||
conn = psycopg2.connect("dbname=sentinel user=postgres password=secret")
|
||||
cursor = conn.cursor()
|
||||
|
||||
# --- Models ---
|
||||
class QuarantineEntry(BaseModel):
|
||||
id: int
|
||||
timestamp: str
|
||||
reason: str
|
||||
log_line: str
|
||||
reviewed: bool
|
||||
|
||||
class RegexRule(BaseModel):
|
||||
regex_name: str
|
||||
regex_pattern: str
|
||||
severity: str
|
||||
confidence: float
|
||||
active: bool
|
||||
|
||||
# --- Endpoints ---
|
||||
@app.get("/quarantine", response_model=List[QuarantineEntry])
|
||||
def get_quarantine():
|
||||
cursor.execute("SELECT id, timestamp, reason, log_line, reviewed FROM quarantine")
|
||||
rows = cursor.fetchall()
|
||||
return [QuarantineEntry(id=r[0], timestamp=str(r[1]), reason=r[2], log_line=r[3], reviewed=r[4]) for r in rows]
|
||||
|
||||
@app.post("/quarantine/review/{entry_id}")
|
||||
def review_quarantine(entry_id: int):
|
||||
cursor.execute("UPDATE quarantine SET reviewed=true WHERE id=%s", (entry_id,))
|
||||
conn.commit()
|
||||
return {"status": "ok", "message": f"Quarantine entry {entry_id} marked reviewed"}
|
||||
|
||||
@app.get("/rules", response_model=List[RegexRule])
|
||||
def get_rules():
|
||||
# Load from patterns.yaml
|
||||
with open("patterns.yaml", "r") as f:
|
||||
patterns = json.load(f) if f.read().strip().startswith("{") else {}
|
||||
rules = []
|
||||
for name, rule in patterns.get("patterns", {}).items():
|
||||
rules.append(RegexRule(
|
||||
regex_name=name,
|
||||
regex_pattern=rule["regex"],
|
||||
severity=rule["severity"],
|
||||
confidence=rule["confidence"],
|
||||
active=rule["active"]
|
||||
))
|
||||
return rules
|
||||
|
||||
@app.post("/rules/update")
|
||||
def update_rule(rule: RegexRule):
|
||||
# Append to patterns_history table
|
||||
cursor.execute("""
|
||||
INSERT INTO patterns_history (regex_name, old_regex, new_regex, action, confidence, status, submitted_by)
|
||||
VALUES (%s, %s, %s, 'update', %s, 'pending', 'admin')
|
||||
""", (rule.regex_name, None, rule.regex_pattern, rule.confidence))
|
||||
conn.commit()
|
||||
return {"status": "ok", "message": f"Regex {rule.regex_name} queued for update"}
|
||||
```
|
||||
|
||||
✅ **Why this skeleton works:**
|
||||
|
||||
* REST endpoints for **Quarantine**, **Rules**, **History**.
|
||||
* Uses **Postgres for persistence**.
|
||||
* Reads from `patterns.yaml` for active rules.
|
||||
|
||||
---
|
||||
|
||||
## 🚀 **Now You Have:**
|
||||
|
||||
✅ A **Postgres schema** for regex change history.
|
||||
✅ **Wireframes** for the admin dashboard.
|
||||
✅ A **FastAPI skeleton** your team can expand into a full API/UI stack.
|
||||
512
modules/shhh/SHHH_HYPERCORE_MONITOR_PLAN.md
Normal file
512
modules/shhh/SHHH_HYPERCORE_MONITOR_PLAN.md
Normal file
@@ -0,0 +1,512 @@
|
||||
# 🔒 SHHH Hypercore Log Monitor - Implementation Plan
|
||||
|
||||
## Executive Summary
|
||||
|
||||
This plan outlines the creation of a Python application that monitors our hypercore log to ensure no secrets are leaked in BZZZ messages, based on the SHHH module's secrets detection framework.
|
||||
|
||||
## Project Overview
|
||||
|
||||
### Objective
|
||||
Create a real-time monitoring system that:
|
||||
- Monitors hypercore log entries for secret patterns
|
||||
- Detects potential secrets in BZZZ P2P messages before they propagate
|
||||
- Quarantines suspicious entries and triggers automatic remediation
|
||||
- Provides audit trails and security dashboard for compliance
|
||||
|
||||
### Architecture Integration
|
||||
- **Hypercore Log**: Source of truth for all CHORUS Services events
|
||||
- **BZZZ Network**: P2P messaging layer that could inadvertently transmit secrets
|
||||
- **SHHH Module**: Existing secrets detection framework and patterns
|
||||
- **Monitoring App**: New Python application bridging these systems
|
||||
|
||||
## Technical Requirements
|
||||
|
||||
### 1. Hypercore Log Integration
|
||||
```python
|
||||
# Real-time log monitoring
|
||||
- Stream hypercore entries as they're written
|
||||
- Parse BZZZ message payloads for secret patterns
|
||||
- Filter for message types that could contain secrets
|
||||
- Handle log rotation and recovery scenarios
|
||||
```
|
||||
|
||||
### 2. Secret Detection Engine
|
||||
Based on SHHH's `patterns.yaml` framework:
|
||||
```yaml
|
||||
patterns:
|
||||
AWS_ACCESS_KEY:
|
||||
regex: "AKIA[0-9A-Z]{16}"
|
||||
severity: "HIGH"
|
||||
confidence: 0.95
|
||||
active: true
|
||||
GITHUB_TOKEN:
|
||||
regex: "ghp_[0-9A-Za-z]{36}"
|
||||
severity: "HIGH"
|
||||
confidence: 0.92
|
||||
active: true
|
||||
PRIVATE_KEY:
|
||||
regex: "-----BEGIN [A-Z ]*PRIVATE KEY-----"
|
||||
severity: "CRITICAL"
|
||||
confidence: 0.98
|
||||
active: true
|
||||
```
|
||||
|
||||
### 3. Quarantine & Response System
|
||||
- **Immediate**: Block message propagation in BZZZ network
|
||||
- **Log**: Store quarantined entries in PostgreSQL
|
||||
- **Alert**: Notify security team via webhooks
|
||||
- **Revoke**: Trigger automatic secret revocation APIs
|
||||
|
||||
## Implementation Architecture
|
||||
|
||||
### Phase 1: Core Monitoring System (Weeks 1-2)
|
||||
|
||||
#### 1.1 Hypercore Log Reader
|
||||
```python
|
||||
# /shhh-monitor/core/hypercore_reader.py
|
||||
class HypercoreReader:
|
||||
def __init__(self, log_path: str):
|
||||
self.log_path = log_path
|
||||
self.position = 0
|
||||
|
||||
def stream_entries(self) -> Iterator[LogEntry]:
|
||||
"""Stream new hypercore entries in real-time"""
|
||||
# Tail-like functionality with inotify
|
||||
# Parse hypercore binary format
|
||||
# Yield structured LogEntry objects
|
||||
|
||||
def parse_bzzz_message(self, entry: LogEntry) -> Optional[BzzzMessage]:
|
||||
"""Extract BZZZ message payload from hypercore entry"""
|
||||
# Decode BZZZ message format
|
||||
# Extract message content and metadata
|
||||
# Return structured message or None
|
||||
```
|
||||
|
||||
#### 1.2 Secret Detection Engine
|
||||
```python
|
||||
# /shhh-monitor/core/detector.py
|
||||
class SecretDetector:
|
||||
def __init__(self, patterns_file: str = "patterns.yaml"):
|
||||
self.patterns = self.load_patterns(patterns_file)
|
||||
|
||||
def scan_message(self, message: BzzzMessage) -> List[SecretMatch]:
|
||||
"""Scan BZZZ message for secret patterns"""
|
||||
matches = []
|
||||
for pattern_name, pattern in self.patterns.items():
|
||||
if pattern["active"]:
|
||||
matches.extend(self.apply_regex(message, pattern))
|
||||
return matches
|
||||
|
||||
def redact_secret(self, text: str, match: SecretMatch) -> str:
|
||||
"""Redact detected secret while preserving context"""
|
||||
# Replace secret with asterisks, keep first/last chars
|
||||
# Maintain log readability for analysis
|
||||
```
|
||||
|
||||
#### 1.3 Quarantine System
|
||||
```python
|
||||
# /shhh-monitor/core/quarantine.py
|
||||
class QuarantineManager:
|
||||
def __init__(self, db_connection: str):
|
||||
self.db = psycopg2.connect(db_connection)
|
||||
|
||||
def quarantine_message(self, message: BzzzMessage, matches: List[SecretMatch]):
|
||||
"""Store quarantined message and block propagation"""
|
||||
# Insert into quarantine table
|
||||
# Generate alert payload
|
||||
# Trigger BZZZ network block
|
||||
|
||||
def send_alert(self, severity: str, secret_type: str, redacted_content: str):
|
||||
"""Send webhook alerts for detected secrets"""
|
||||
# POST to security webhook endpoints
|
||||
# Different payloads for AWS, GitHub, Slack tokens
|
||||
# Include revocation recommendations
|
||||
```
|
||||
|
||||
### Phase 2: BZZZ Network Integration (Weeks 3-4)
|
||||
|
||||
#### 2.1 BZZZ Message Interceptor
|
||||
```python
|
||||
# /shhh-monitor/integrations/bzzz_interceptor.py
|
||||
class BzzzInterceptor:
|
||||
def __init__(self, bzzz_config: Dict):
|
||||
self.bzzz_client = BzzzClient(bzzz_config)
|
||||
|
||||
def install_message_hook(self):
|
||||
"""Install pre-send hook in BZZZ network layer"""
|
||||
# Intercept messages before P2P transmission
|
||||
# Scan with SecretDetector
|
||||
# Block or allow message propagation
|
||||
|
||||
def block_message(self, message_id: str, reason: str):
|
||||
"""Prevent message from propagating in P2P network"""
|
||||
# Mark message as blocked in BZZZ
|
||||
# Log blocking reason
|
||||
# Notify sender agent of security violation
|
||||
```
|
||||
|
||||
#### 2.2 Real-time Processing Pipeline
|
||||
```python
|
||||
# /shhh-monitor/pipeline/processor.py
|
||||
class MessageProcessor:
|
||||
def __init__(self, detector: SecretDetector, quarantine: QuarantineManager):
|
||||
self.detector = detector
|
||||
self.quarantine = quarantine
|
||||
|
||||
async def process_hypercore_stream(self):
|
||||
"""Main processing loop for hypercore monitoring"""
|
||||
async for entry in self.hypercore_reader.stream_entries():
|
||||
if bzzz_message := self.parse_bzzz_message(entry):
|
||||
matches = self.detector.scan_message(bzzz_message)
|
||||
if matches:
|
||||
await self.handle_secret_detection(bzzz_message, matches)
|
||||
|
||||
async def handle_secret_detection(self, message: BzzzMessage, matches: List[SecretMatch]):
|
||||
"""Handle detected secrets with appropriate response"""
|
||||
# Determine severity level
|
||||
# Quarantine message
|
||||
# Send alerts
|
||||
# Trigger revocation if needed
|
||||
# Update detection statistics
|
||||
```
|
||||
|
||||
### Phase 3: Admin Dashboard & Feedback Loop (Weeks 5-6)
|
||||
|
||||
#### 3.1 FastAPI Backend
|
||||
```python
|
||||
# /shhh-monitor/api/main.py
|
||||
from fastapi import FastAPI, Depends
|
||||
from .models import QuarantineEntry, SecretPattern, RevocationEvent
|
||||
|
||||
app = FastAPI(title="SHHH Hypercore Monitor API")
|
||||
|
||||
@app.get("/quarantine", response_model=List[QuarantineEntry])
|
||||
async def get_quarantine_entries():
|
||||
"""List all quarantined messages"""
|
||||
|
||||
@app.post("/quarantine/{entry_id}/review")
|
||||
async def review_quarantine_entry(entry_id: int, action: str):
|
||||
"""Mark quarantine entry as reviewed/false positive"""
|
||||
|
||||
@app.get("/patterns", response_model=List[SecretPattern])
|
||||
async def get_detection_patterns():
|
||||
"""List all secret detection patterns"""
|
||||
|
||||
@app.post("/patterns/{pattern_name}/update")
|
||||
async def update_pattern(pattern_name: str, pattern: SecretPattern):
|
||||
"""Update regex pattern based on feedback"""
|
||||
```
|
||||
|
||||
#### 3.2 React Dashboard Frontend
|
||||
```typescript
|
||||
// /shhh-monitor/dashboard/src/components/QuarantineDashboard.tsx
|
||||
interface QuarantineDashboard {
|
||||
// Real-time quarantine feed
|
||||
// Pattern management interface
|
||||
// Revocation status tracking
|
||||
// Security metrics and charts
|
||||
// Alert configuration
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 4: Automated Response & Learning (Weeks 7-8)
|
||||
|
||||
#### 4.1 Automated Secret Revocation
|
||||
```python
|
||||
# /shhh-monitor/automation/revocation.py
|
||||
class SecretRevoker:
|
||||
def __init__(self):
|
||||
self.aws_client = boto3.client('iam')
|
||||
self.github_client = github.Github()
|
||||
self.slack_client = slack.WebClient()
|
||||
|
||||
async def revoke_aws_key(self, access_key_id: str):
|
||||
"""Automatically deactivate AWS access key"""
|
||||
self.aws_client.update_access_key(
|
||||
AccessKeyId=access_key_id,
|
||||
Status='Inactive'
|
||||
)
|
||||
|
||||
async def revoke_github_token(self, token: str):
|
||||
"""Revoke GitHub personal access token"""
|
||||
# Use GitHub's token scanning API
|
||||
# Or organization webhook for automatic revocation
|
||||
|
||||
async def revoke_slack_token(self, token: str):
|
||||
"""Revoke Slack bot/user token"""
|
||||
# Use Slack Admin API
|
||||
# Invalidate token and rotate if possible
|
||||
```
|
||||
|
||||
#### 4.2 Meta-Learning System
|
||||
```python
|
||||
# /shhh-monitor/learning/meta_curator.py
|
||||
class MetaCurator:
|
||||
def __init__(self, llm_client):
|
||||
self.llm = llm_client
|
||||
|
||||
async def analyze_false_positives(self, entries: List[QuarantineEntry]):
|
||||
"""Use LLM to improve regex patterns"""
|
||||
# Analyze patterns in false positives
|
||||
# Generate regex refinements
|
||||
# Submit for human approval
|
||||
|
||||
async def detect_new_secret_types(self, quarantine_history: List[QuarantineEntry]):
|
||||
"""Identify new types of secrets to detect"""
|
||||
# Look for patterns in undetected secrets
|
||||
# Generate new regex proposals
|
||||
# Calculate confidence scores
|
||||
```
|
||||
|
||||
## Database Schema
|
||||
|
||||
### Core Tables
|
||||
```sql
|
||||
-- Quarantined messages
|
||||
CREATE TABLE quarantine (
|
||||
id SERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
hypercore_position BIGINT NOT NULL,
|
||||
bzzz_message_id TEXT NOT NULL,
|
||||
secret_type TEXT NOT NULL,
|
||||
severity TEXT CHECK (severity IN ('LOW', 'MEDIUM', 'HIGH', 'CRITICAL')),
|
||||
confidence NUMERIC(3,2),
|
||||
redacted_content TEXT NOT NULL,
|
||||
full_content_hash TEXT NOT NULL, -- For audit purposes
|
||||
reviewed BOOLEAN DEFAULT FALSE,
|
||||
review_action TEXT, -- 'false_positive', 'confirmed', 'uncertain'
|
||||
reviewer TEXT,
|
||||
review_timestamp TIMESTAMPTZ
|
||||
);
|
||||
|
||||
-- Pattern history and evolution
|
||||
CREATE TABLE patterns_history (
|
||||
id SERIAL PRIMARY KEY,
|
||||
pattern_name TEXT NOT NULL,
|
||||
old_regex TEXT,
|
||||
new_regex TEXT,
|
||||
action TEXT CHECK (action IN ('add', 'update', 'remove')),
|
||||
confidence NUMERIC(3,2),
|
||||
status TEXT CHECK (status IN ('approved', 'pending', 'rejected')),
|
||||
submitted_by TEXT NOT NULL, -- 'human', 'meta_curator', 'feedback_system'
|
||||
approved_by TEXT,
|
||||
decision_reason TEXT,
|
||||
timestamp TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Revocation events tracking
|
||||
CREATE TABLE revocations (
|
||||
id SERIAL PRIMARY KEY,
|
||||
quarantine_id INTEGER REFERENCES quarantine(id),
|
||||
secret_type TEXT NOT NULL,
|
||||
revocation_method TEXT NOT NULL, -- 'aws_api', 'github_api', 'manual'
|
||||
status TEXT CHECK (status IN ('success', 'failed', 'pending')),
|
||||
response_data JSONB, -- API response details
|
||||
timestamp TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Performance metrics
|
||||
CREATE TABLE detection_metrics (
|
||||
id SERIAL PRIMARY KEY,
|
||||
date DATE NOT NULL,
|
||||
total_messages_scanned INTEGER,
|
||||
secrets_detected INTEGER,
|
||||
false_positives INTEGER,
|
||||
patterns_updated INTEGER,
|
||||
avg_detection_latency_ms INTEGER
|
||||
);
|
||||
```
|
||||
|
||||
## Security Considerations
|
||||
|
||||
### 1. Secure Secret Storage
|
||||
- **Never store actual secrets** in quarantine database
|
||||
- Use **cryptographic hashes** for audit trails
|
||||
- **Redact sensitive content** while preserving detection context
|
||||
- Implement **secure deletion** for expired quarantine entries
|
||||
|
||||
### 2. Access Control
|
||||
- **Role-based access** to dashboard (security admin, reviewer, read-only)
|
||||
- **Audit logging** for all administrative actions
|
||||
- **OAuth integration** with existing identity provider
|
||||
- **API key authentication** for automated systems
|
||||
|
||||
### 3. Network Security
|
||||
- **TLS encryption** for all API communication
|
||||
- **VPN/private network** access to monitoring systems
|
||||
- **Rate limiting** to prevent API abuse
|
||||
- **IP allowlisting** for critical endpoints
|
||||
|
||||
## Deployment Architecture
|
||||
|
||||
### Development Environment
|
||||
```yaml
|
||||
# docker-compose.dev.yml
|
||||
services:
|
||||
shhh-monitor:
|
||||
build: .
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://dev:dev@postgres:5432/shhh_dev
|
||||
- HYPERCORE_LOG_PATH=/data/hypercore.log
|
||||
- BZZZ_CONFIG_PATH=/config/bzzz.yaml
|
||||
volumes:
|
||||
- ./data:/data
|
||||
- ./config:/config
|
||||
|
||||
postgres:
|
||||
image: postgres:15
|
||||
environment:
|
||||
POSTGRES_DB: shhh_dev
|
||||
POSTGRES_USER: dev
|
||||
POSTGRES_PASSWORD: dev
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
# For caching and real-time notifications
|
||||
```
|
||||
|
||||
### Production Deployment
|
||||
```yaml
|
||||
# docker-compose.prod.yml
|
||||
services:
|
||||
shhh-monitor:
|
||||
image: registry.home.deepblack.cloud/tony/shhh-monitor:latest
|
||||
deploy:
|
||||
replicas: 2
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://shhh:${SHHH_DB_PASSWORD}@postgres:5432/shhh_prod
|
||||
- HYPERCORE_LOG_PATH=/hypercore/current.log
|
||||
networks:
|
||||
- shhh_network
|
||||
- tengig # For dashboard access
|
||||
```
|
||||
|
||||
## Performance Requirements
|
||||
|
||||
### Latency Targets
|
||||
- **Log Processing**: <50ms per hypercore entry
|
||||
- **Secret Detection**: <10ms per BZZZ message
|
||||
- **Alert Generation**: <100ms for critical secrets
|
||||
- **Dashboard Response**: <200ms for UI queries
|
||||
|
||||
### Throughput Targets
|
||||
- **Message Scanning**: 1000 messages/second
|
||||
- **Concurrent Users**: 10+ dashboard users
|
||||
- **Alert Volume**: 100+ alerts/hour during peak
|
||||
- **Database Queries**: <5ms average response time
|
||||
|
||||
## Monitoring & Observability
|
||||
|
||||
### Metrics Collection
|
||||
```python
|
||||
# Prometheus metrics
|
||||
messages_scanned_total = Counter('shhh_messages_scanned_total')
|
||||
secrets_detected_total = Counter('shhh_secrets_detected_total', ['secret_type', 'severity'])
|
||||
detection_latency = Histogram('shhh_detection_latency_seconds')
|
||||
quarantine_size = Gauge('shhh_quarantine_entries_total')
|
||||
```
|
||||
|
||||
### Health Checks
|
||||
- **Hypercore connectivity**: Verify log file access
|
||||
- **Database health**: Connection pool status
|
||||
- **BZZZ integration**: P2P network connectivity
|
||||
- **Alert system**: Webhook endpoint validation
|
||||
|
||||
### Logging Strategy
|
||||
```python
|
||||
# Structured logging with correlation IDs
|
||||
{
|
||||
"timestamp": "2025-08-02T13:45:00Z",
|
||||
"level": "WARNING",
|
||||
"event": "secret_detected",
|
||||
"correlation_id": "req_123",
|
||||
"secret_type": "AWS_ACCESS_KEY",
|
||||
"severity": "HIGH",
|
||||
"hypercore_position": 58321,
|
||||
"bzzz_message_id": "msg_abc123",
|
||||
"redacted_content": "AKIA****XYZ found in agent message"
|
||||
}
|
||||
```
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Unit Tests
|
||||
- **Regex pattern validation**: Test against known secret formats
|
||||
- **Message parsing**: Verify hypercore and BZZZ format handling
|
||||
- **Quarantine logic**: Test storage and retrieval functions
|
||||
- **Alert generation**: Mock webhook endpoint testing
|
||||
|
||||
### Integration Tests
|
||||
- **End-to-end workflow**: Log → Detection → Quarantine → Alert
|
||||
- **Database operations**: PostgreSQL CRUD operations
|
||||
- **BZZZ integration**: Message interception and blocking
|
||||
- **API endpoints**: FastAPI route testing
|
||||
|
||||
### Security Tests
|
||||
- **Input validation**: SQL injection, XSS prevention
|
||||
- **Access control**: Role-based permission testing
|
||||
- **Data protection**: Verify secret redaction and hashing
|
||||
- **Performance**: Load testing with high message volume
|
||||
|
||||
## Rollout Plan
|
||||
|
||||
### Phase 1: Foundation (Weeks 1-2)
|
||||
- ✅ Core monitoring system with hypercore integration
|
||||
- ✅ Basic secret detection using SHHH patterns
|
||||
- ✅ PostgreSQL quarantine storage
|
||||
- ✅ Simple alerting via webhooks
|
||||
|
||||
### Phase 2: Integration (Weeks 3-4)
|
||||
- ✅ BZZZ network message interception
|
||||
- ✅ Real-time processing pipeline
|
||||
- ✅ Enhanced pattern management
|
||||
- ✅ Performance optimization
|
||||
|
||||
### Phase 3: Dashboard (Weeks 5-6)
|
||||
- ✅ FastAPI backend with full CRUD operations
|
||||
- ✅ React dashboard for quarantine management
|
||||
- ✅ Pattern editor and approval workflow
|
||||
- ✅ Security metrics and reporting
|
||||
|
||||
### Phase 4: Automation (Weeks 7-8)
|
||||
- ✅ Automated secret revocation APIs
|
||||
- ✅ Meta-learning system for pattern improvement
|
||||
- ✅ Production deployment and monitoring
|
||||
- ✅ Documentation and team training
|
||||
|
||||
## Success Criteria
|
||||
|
||||
### Security Effectiveness
|
||||
- **Zero secret leaks** in BZZZ P2P network after deployment
|
||||
- **<1% false positive rate** for secret detection
|
||||
- **<30 seconds** average time to detect and quarantine secrets
|
||||
- **99.9% uptime** for monitoring system
|
||||
|
||||
### Operational Excellence
|
||||
- **Complete audit trail** for all security events
|
||||
- **Self-improving** pattern detection through feedback
|
||||
- **Scalable architecture** supporting growth in CHORUS usage
|
||||
- **Team adoption** with trained security administrators
|
||||
|
||||
## Risk Mitigation
|
||||
|
||||
### Technical Risks
|
||||
- **Performance impact**: Monitor hypercore processing overhead
|
||||
- **False positives**: Implement feedback loop for pattern refinement
|
||||
- **BZZZ integration**: Maintain compatibility with P2P protocol evolution
|
||||
- **Data loss**: Backup quarantine database and implement recovery procedures
|
||||
|
||||
### Security Risks
|
||||
- **Bypassing detection**: Regular pattern updates and meta-learning
|
||||
- **System compromise**: Network isolation and access controls
|
||||
- **Secret exposure**: Implement proper redaction and audit procedures
|
||||
- **Alert fatigue**: Tune detection thresholds to minimize noise
|
||||
|
||||
## Conclusion
|
||||
|
||||
This SHHH Hypercore Log Monitor provides comprehensive protection against secret leakage in the CHORUS Services BZZZ P2P network. By implementing real-time detection, automated response, and continuous learning, we ensure that sensitive information remains secure while maintaining the performance and functionality of the distributed AI orchestration platform.
|
||||
|
||||
The system builds upon the existing SHHH framework while adding the specific hypercore and BZZZ integrations needed for CHORUS Services. The phased rollout ensures stability and allows for iterative improvement based on real-world usage patterns.
|
||||
251
modules/shhh/SHHH_SECRETS_SENTINEL_AGENT_PROMPT.md
Normal file
251
modules/shhh/SHHH_SECRETS_SENTINEL_AGENT_PROMPT.md
Normal file
@@ -0,0 +1,251 @@
|
||||
# 🛡️ CHORUS Services Secrets Sentinel Agent - System Prompt
|
||||
|
||||
## Agent Role & Mission
|
||||
|
||||
You are the **Secrets Sentinel**, a specialized security agent responsible for monitoring the CHORUS Services hypercore log and BZZZ P2P network messages to detect, quarantine, and prevent the leakage of sensitive credentials and secrets.
|
||||
|
||||
## Core Responsibilities
|
||||
|
||||
### 🔍 **Detection & Analysis**
|
||||
- **Real-time Log Monitoring**: Continuously scan hypercore log entries for secret patterns
|
||||
- **BZZZ Message Inspection**: Analyze P2P messages before they propagate across the network
|
||||
- **Pattern Recognition**: Apply sophisticated regex patterns to identify various secret types
|
||||
- **Context Analysis**: Understand the context around detected patterns to minimize false positives
|
||||
|
||||
### 🚨 **Immediate Response Actions**
|
||||
- **Redaction**: Immediately redact detected secrets while preserving log context
|
||||
- **Quarantine**: Isolate HIGH severity log entries from normal processing
|
||||
- **Network Blocking**: Prevent BZZZ messages containing secrets from propagating
|
||||
- **Alert Generation**: Send immediate notifications to security team
|
||||
|
||||
### 🔄 **Automated Remediation**
|
||||
- **Revocation Triggers**: Automatically trigger webhook-based secret revocation
|
||||
- **API Integration**: Interface with AWS, GitHub, Slack APIs for immediate credential deactivation
|
||||
- **Audit Trail**: Maintain complete records of all detection and remediation actions
|
||||
|
||||
### 🧠 **Adaptive Learning**
|
||||
- **Pattern Evolution**: Update detection rules based on feedback and new secret types
|
||||
- **False Positive Reduction**: Refine patterns based on curator feedback
|
||||
- **Confidence Scoring**: Assign confidence levels to detections for proper escalation
|
||||
|
||||
## Detection Patterns & Rules
|
||||
|
||||
### **High Severity Secrets (Immediate Quarantine + Revocation)**
|
||||
```yaml
|
||||
AWS_ACCESS_KEY:
|
||||
regex: "AKIA[0-9A-Z]{16}"
|
||||
severity: "CRITICAL"
|
||||
confidence: 0.95
|
||||
action: "quarantine_and_revoke"
|
||||
|
||||
PRIVATE_KEY:
|
||||
regex: "-----BEGIN [A-Z ]*PRIVATE KEY-----"
|
||||
severity: "CRITICAL"
|
||||
confidence: 0.98
|
||||
action: "quarantine_and_revoke"
|
||||
|
||||
GITHUB_TOKEN:
|
||||
regex: "ghp_[0-9A-Za-z]{36}"
|
||||
severity: "HIGH"
|
||||
confidence: 0.92
|
||||
action: "quarantine_and_revoke"
|
||||
```
|
||||
|
||||
### **Medium Severity Secrets (Quarantine + Alert)**
|
||||
```yaml
|
||||
JWT_TOKEN:
|
||||
regex: "eyJ[A-Za-z0-9_-]+?\\.[A-Za-z0-9_-]+?\\.[A-Za-z0-9_-]+?"
|
||||
severity: "MEDIUM"
|
||||
confidence: 0.85
|
||||
action: "quarantine_and_alert"
|
||||
|
||||
SLACK_TOKEN:
|
||||
regex: "xox[baprs]-[0-9A-Za-z-]{10,48}"
|
||||
severity: "HIGH"
|
||||
confidence: 0.90
|
||||
action: "quarantine_and_revoke"
|
||||
```
|
||||
|
||||
## Behavioral Guidelines
|
||||
|
||||
### **Detection Behavior**
|
||||
1. **Scan Every Log Entry**: Process all hypercore entries in real-time
|
||||
2. **Parse BZZZ Messages**: Extract and analyze P2P message payloads
|
||||
3. **Apply Pattern Matching**: Use confidence-weighted regex patterns
|
||||
4. **Context Preservation**: Maintain enough context for security analysis
|
||||
|
||||
### **Response Behavior**
|
||||
1. **Immediate Action**: For CRITICAL/HIGH severity, act within seconds
|
||||
2. **Graduated Response**: Different actions based on severity levels
|
||||
3. **Human Escalation**: Flag uncertain cases for human review
|
||||
4. **Audit Everything**: Log all actions with timestamps and reasons
|
||||
|
||||
### **Learning Behavior**
|
||||
1. **Accept Feedback**: Process curator reports of false positives/missed secrets
|
||||
2. **Pattern Refinement**: Propose regex updates based on feedback
|
||||
3. **Version Control**: Track all pattern changes with confidence scores
|
||||
4. **Human Approval**: Submit new patterns for security admin approval
|
||||
|
||||
## Operational Procedures
|
||||
|
||||
### **Log Entry Processing Workflow**
|
||||
```
|
||||
1. Receive hypercore log entry
|
||||
2. Parse entry structure and extract content
|
||||
3. If BZZZ message → extract P2P payload
|
||||
4. Apply all active regex patterns
|
||||
5. Calculate confidence scores
|
||||
6. Determine severity level
|
||||
7. Execute appropriate response action
|
||||
8. Log detection event and actions taken
|
||||
```
|
||||
|
||||
### **Quarantine Procedure**
|
||||
```python
|
||||
def quarantine_log_entry(entry, secret_type, confidence):
|
||||
"""Quarantine sensitive log entry for security review"""
|
||||
redacted_content = redact_secrets(entry.content)
|
||||
quarantine_record = {
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||
"hypercore_position": entry.position,
|
||||
"secret_type": secret_type,
|
||||
"severity": determine_severity(secret_type),
|
||||
"confidence": confidence,
|
||||
"redacted_content": redacted_content,
|
||||
"content_hash": hash(entry.content), # For audit
|
||||
"source_agent": entry.source_agent,
|
||||
"reason": f"Secret detected: {secret_type}"
|
||||
}
|
||||
store_in_quarantine_db(quarantine_record)
|
||||
if entry.is_bzzz_message:
|
||||
block_bzzz_propagation(entry.message_id)
|
||||
return quarantine_record
|
||||
```
|
||||
|
||||
### **Revocation Trigger Procedure**
|
||||
```python
|
||||
def trigger_secret_revocation(secret_type, redacted_sample):
|
||||
"""Trigger automated secret revocation via webhooks"""
|
||||
revocation_payload = {
|
||||
"event": "secret_leak_detected",
|
||||
"secret_type": secret_type,
|
||||
"redacted_key": redacted_sample,
|
||||
"hypercore_position": current_position,
|
||||
"severity": determine_severity(secret_type),
|
||||
"recommended_action": get_revocation_action(secret_type),
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z"
|
||||
}
|
||||
|
||||
webhook_url = REVOCATION_HOOKS.get(secret_type)
|
||||
if webhook_url:
|
||||
send_webhook(webhook_url, revocation_payload)
|
||||
log_revocation_attempt(secret_type, "triggered")
|
||||
```
|
||||
|
||||
## Communication Protocols
|
||||
|
||||
### **Alert Format for Security Team**
|
||||
```json
|
||||
{
|
||||
"alert_id": "shhh_12345",
|
||||
"timestamp": "2025-08-02T13:45:00Z",
|
||||
"severity": "HIGH",
|
||||
"secret_type": "AWS_ACCESS_KEY",
|
||||
"source": "hypercore_position_58321",
|
||||
"agent_source": "whoosh_orchestrator",
|
||||
"redacted_content": "Found AWS key AKIA****XYZ in deployment config",
|
||||
"confidence": 0.95,
|
||||
"actions_taken": ["quarantined", "revocation_triggered"],
|
||||
"next_steps": "Manual verification recommended"
|
||||
}
|
||||
```
|
||||
|
||||
### **Feedback Processing Format**
|
||||
```json
|
||||
{
|
||||
"feedback_type": "false_positive|missed_secret|pattern_improvement",
|
||||
"alert_id": "shhh_12345",
|
||||
"secret_type": "AWS_ACCESS_KEY",
|
||||
"evidence": "Key was test data: AKIA-TESTKEY-123",
|
||||
"suggested_regex_fix": "AKIA[0-9A-Z]{16}(?!-TESTKEY)",
|
||||
"reviewer": "security_admin",
|
||||
"timestamp": "2025-08-02T14:00:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
## Performance Requirements
|
||||
|
||||
### **Response Time Targets**
|
||||
- **Detection Latency**: <50ms per log entry
|
||||
- **Quarantine Action**: <100ms for high severity
|
||||
- **Revocation Trigger**: <200ms for webhook dispatch
|
||||
- **BZZZ Block**: <10ms to prevent propagation
|
||||
|
||||
### **Accuracy Standards**
|
||||
- **False Positive Rate**: <2% for high confidence patterns
|
||||
- **Detection Coverage**: >99% for known secret formats
|
||||
- **Pattern Confidence**: Minimum 0.80 for active patterns
|
||||
|
||||
## Error Handling & Recovery
|
||||
|
||||
### **System Failures**
|
||||
- **Database Connectivity**: Queue quarantine entries locally, sync when recovered
|
||||
- **Webhook Failures**: Retry with exponential backoff, alert on continued failure
|
||||
- **Pattern Loading**: Fall back to core patterns if external config unavailable
|
||||
- **Log Processing**: Never skip entries, flag for manual review if uncertain
|
||||
|
||||
### **Security Incident Response**
|
||||
- **Potential Breach**: Immediately escalate to security team
|
||||
- **Pattern Bypass**: Alert security team, request pattern review
|
||||
- **False Negative**: Update patterns, retroactively scan recent logs
|
||||
- **System Compromise**: Quarantine all recent activity, manual investigation
|
||||
|
||||
## Integration Points
|
||||
|
||||
### **CHORUS Services Components**
|
||||
- **Hypercore Log**: Primary data source for monitoring
|
||||
- **BZZZ Network**: P2P message inspection and blocking capability
|
||||
- **WHOOSH Orchestrator**: Agent activity monitoring
|
||||
- **SLURP Context**: Context-aware secret detection
|
||||
- **Security Dashboard**: Real-time alert display and management
|
||||
|
||||
### **External Systems**
|
||||
- **AWS IAM**: Automated access key revocation
|
||||
- **GitHub API**: Personal access token deactivation
|
||||
- **Slack Admin API**: Bot/user token revocation
|
||||
- **Security SIEM**: Alert forwarding and correlation
|
||||
- **Audit System**: Compliance logging and reporting
|
||||
|
||||
## Continuous Improvement
|
||||
|
||||
### **Pattern Learning Process**
|
||||
1. **Feedback Collection**: Gather curator reports on detection accuracy
|
||||
2. **Pattern Analysis**: Identify common false positive/negative patterns
|
||||
3. **Regex Generation**: Create new patterns using AI-assisted regex generation
|
||||
4. **Confidence Assessment**: Test new patterns against historical data
|
||||
5. **Human Review**: Submit high-confidence patterns for security admin approval
|
||||
6. **Production Deployment**: Activate approved patterns with monitoring
|
||||
|
||||
### **Meta-Learning Capabilities**
|
||||
- **Trend Analysis**: Identify emerging secret types and formats
|
||||
- **Context Learning**: Improve understanding of legitimate vs. malicious patterns
|
||||
- **Agent Behavior**: Learn which agents commonly handle sensitive data
|
||||
- **Temporal Patterns**: Understand when secret leaks are most likely to occur
|
||||
|
||||
## Success Metrics
|
||||
|
||||
### **Security Effectiveness**
|
||||
- **Zero secret propagation** in BZZZ P2P network post-deployment
|
||||
- **Mean time to detection**: <1 minute for any secret exposure
|
||||
- **Revocation success rate**: >95% for automated responses
|
||||
- **Coverage improvement**: Continuous expansion of detectable secret types
|
||||
|
||||
### **Operational Excellence**
|
||||
- **System uptime**: >99.9% availability for log monitoring
|
||||
- **Processing throughput**: Handle 10,000+ log entries per minute
|
||||
- **Alert quality**: <5% false positive rate for security team alerts
|
||||
- **Response automation**: >90% of secrets handled without human intervention
|
||||
|
||||
You are now equipped to serve as the CHORUS Services Secrets Sentinel. Monitor vigilantly, respond swiftly, and continuously evolve your detection capabilities to protect our distributed AI orchestration platform from credential exposure and security breaches.
|
||||
|
||||
Remember: **Security is paramount. When in doubt, quarantine and escalate.**
|
||||
4
modules/shhh/api/__init__.py
Normal file
4
modules/shhh/api/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# SHHH API Module
|
||||
"""
|
||||
FastAPI dashboard and API endpoints for SHHH Secrets Sentinel.
|
||||
"""
|
||||
374
modules/shhh/api/main.py
Normal file
374
modules/shhh/api/main.py
Normal file
@@ -0,0 +1,374 @@
|
||||
"""
|
||||
FastAPI Dashboard Backend for SHHH Secrets Sentinel
|
||||
Provides REST API endpoints for quarantine management and system monitoring.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
import structlog
|
||||
|
||||
from .models import (
|
||||
QuarantineEntryResponse, QuarantineReviewRequest, RevocationEventResponse,
|
||||
PatternResponse, PatternUpdateRequest, StatsResponse, SystemHealthResponse,
|
||||
ProcessingStatsResponse, AlertRequest, WebhookTestRequest, WebhookTestResponse,
|
||||
PatternTestRequest, PatternTestResponse, SearchRequest, PaginatedResponse
|
||||
)
|
||||
from ..core.quarantine import QuarantineManager, QuarantineEntry
|
||||
from ..core.detector import SecretDetector
|
||||
from ..automation.revocation import SecretRevoker
|
||||
from ..pipeline.processor import MessageProcessor
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Global components (initialized in lifespan)
|
||||
quarantine_manager: Optional[QuarantineManager] = None
|
||||
detector: Optional[SecretDetector] = None
|
||||
revoker: Optional[SecretRevoker] = None
|
||||
processor: Optional[MessageProcessor] = None
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan manager"""
|
||||
global quarantine_manager, detector, revoker, processor
|
||||
|
||||
try:
|
||||
# Initialize components
|
||||
logger.info("Initializing SHHH API components")
|
||||
|
||||
# These would normally come from configuration
|
||||
config = {
|
||||
'database_url': 'postgresql://shhh:password@localhost:5432/shhh_sentinel',
|
||||
'patterns_file': 'patterns.yaml',
|
||||
'revocation_webhooks': {
|
||||
'AWS_ACCESS_KEY': 'https://security.chorus.services/hooks/aws-revoke',
|
||||
'GITHUB_TOKEN': 'https://security.chorus.services/hooks/github-revoke',
|
||||
'SLACK_TOKEN': 'https://security.chorus.services/hooks/slack-revoke'
|
||||
}
|
||||
}
|
||||
|
||||
# Initialize quarantine manager
|
||||
quarantine_manager = QuarantineManager(config['database_url'])
|
||||
await quarantine_manager.initialize()
|
||||
|
||||
# Initialize detector
|
||||
detector = SecretDetector(config['patterns_file'])
|
||||
|
||||
# Initialize revoker
|
||||
revoker = SecretRevoker(quarantine_manager, config['revocation_webhooks'])
|
||||
|
||||
logger.info("SHHH API components initialized successfully")
|
||||
|
||||
yield
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize SHHH API: {e}")
|
||||
raise
|
||||
finally:
|
||||
# Cleanup
|
||||
if quarantine_manager:
|
||||
await quarantine_manager.close()
|
||||
logger.info("SHHH API components shut down")
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="SHHH Secrets Sentinel API",
|
||||
description="REST API for managing secrets detection, quarantine, and response",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # Configure appropriately for production
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
# Dependency functions
|
||||
async def get_quarantine_manager() -> QuarantineManager:
|
||||
if not quarantine_manager:
|
||||
raise HTTPException(status_code=503, detail="Quarantine manager not available")
|
||||
return quarantine_manager
|
||||
|
||||
|
||||
async def get_detector() -> SecretDetector:
|
||||
if not detector:
|
||||
raise HTTPException(status_code=503, detail="Secret detector not available")
|
||||
return detector
|
||||
|
||||
|
||||
async def get_revoker() -> SecretRevoker:
|
||||
if not revoker:
|
||||
raise HTTPException(status_code=503, detail="Secret revoker not available")
|
||||
return revoker
|
||||
|
||||
|
||||
# Health and status endpoints
|
||||
@app.get("/health", response_model=SystemHealthResponse)
|
||||
async def get_health():
|
||||
"""Get system health status"""
|
||||
health = {
|
||||
'status': 'healthy',
|
||||
'timestamp': datetime.now(),
|
||||
'components': {
|
||||
'quarantine_manager': {
|
||||
'initialized': quarantine_manager is not None,
|
||||
'database_connected': quarantine_manager.pool is not None if quarantine_manager else False
|
||||
},
|
||||
'detector': {
|
||||
'initialized': detector is not None,
|
||||
'patterns_loaded': len(detector.patterns) if detector else 0
|
||||
},
|
||||
'revoker': {
|
||||
'initialized': revoker is not None,
|
||||
'webhooks_configured': len(revoker.webhook_config) if revoker else 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return health
|
||||
|
||||
|
||||
@app.get("/stats", response_model=StatsResponse)
|
||||
async def get_stats(qm: QuarantineManager = Depends(get_quarantine_manager)):
|
||||
"""Get quarantine statistics"""
|
||||
stats = await qm.get_quarantine_stats()
|
||||
return stats
|
||||
|
||||
|
||||
# Quarantine management endpoints
|
||||
@app.get("/quarantine", response_model=List[QuarantineEntryResponse])
|
||||
async def get_quarantine_entries(
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
severity: Optional[str] = None,
|
||||
reviewed: Optional[bool] = None,
|
||||
qm: QuarantineManager = Depends(get_quarantine_manager)
|
||||
):
|
||||
"""Get quarantine entries with optional filters"""
|
||||
entries = await qm.get_quarantine_entries(
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
severity_filter=severity,
|
||||
reviewed_filter=reviewed
|
||||
)
|
||||
|
||||
return [QuarantineEntryResponse(**entry.__dict__) for entry in entries]
|
||||
|
||||
|
||||
@app.post("/quarantine/search", response_model=PaginatedResponse)
|
||||
async def search_quarantine_entries(
|
||||
search: SearchRequest,
|
||||
qm: QuarantineManager = Depends(get_quarantine_manager)
|
||||
):
|
||||
"""Search quarantine entries with advanced filters"""
|
||||
# This would implement more complex search logic
|
||||
entries = await qm.get_quarantine_entries(
|
||||
limit=search.limit,
|
||||
offset=search.offset,
|
||||
severity_filter=search.severity,
|
||||
reviewed_filter=search.reviewed
|
||||
)
|
||||
|
||||
items = [QuarantineEntryResponse(**entry.__dict__) for entry in entries]
|
||||
|
||||
return PaginatedResponse(
|
||||
items=items,
|
||||
total=len(items), # This would be the actual total from a count query
|
||||
limit=search.limit,
|
||||
offset=search.offset,
|
||||
has_more=len(items) == search.limit
|
||||
)
|
||||
|
||||
|
||||
@app.post("/quarantine/{entry_id}/review")
|
||||
async def review_quarantine_entry(
|
||||
entry_id: int,
|
||||
review: QuarantineReviewRequest,
|
||||
qm: QuarantineManager = Depends(get_quarantine_manager)
|
||||
):
|
||||
"""Mark a quarantine entry as reviewed"""
|
||||
success = await qm.mark_reviewed(entry_id, review.action, review.reviewer)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail="Quarantine entry not found")
|
||||
|
||||
return {"status": "success", "message": f"Entry {entry_id} marked as {review.action}"}
|
||||
|
||||
|
||||
@app.get("/quarantine/{entry_id}")
|
||||
async def get_quarantine_entry(
|
||||
entry_id: int,
|
||||
qm: QuarantineManager = Depends(get_quarantine_manager)
|
||||
):
|
||||
"""Get a specific quarantine entry by ID"""
|
||||
# This would need to be implemented in QuarantineManager
|
||||
raise HTTPException(status_code=501, detail="Not implemented yet")
|
||||
|
||||
|
||||
# Pattern management endpoints
|
||||
@app.get("/patterns", response_model=List[PatternResponse])
|
||||
async def get_patterns(detector: SecretDetector = Depends(get_detector)):
|
||||
"""Get all detection patterns"""
|
||||
patterns = []
|
||||
for name, config in detector.patterns.items():
|
||||
patterns.append(PatternResponse(
|
||||
name=name,
|
||||
regex=config['regex'],
|
||||
description=config.get('description', ''),
|
||||
severity=config.get('severity', 'MEDIUM'),
|
||||
confidence=config.get('confidence', 0.8),
|
||||
active=config.get('active', True)
|
||||
))
|
||||
|
||||
return patterns
|
||||
|
||||
|
||||
@app.post("/patterns/{pattern_name}")
|
||||
async def update_pattern(
|
||||
pattern_name: str,
|
||||
pattern: PatternUpdateRequest,
|
||||
detector: SecretDetector = Depends(get_detector)
|
||||
):
|
||||
"""Update or create a detection pattern"""
|
||||
# This would update the patterns.yaml file
|
||||
# For now, just update in memory
|
||||
detector.patterns[pattern_name] = {
|
||||
'regex': pattern.regex,
|
||||
'description': pattern.description,
|
||||
'severity': pattern.severity,
|
||||
'confidence': pattern.confidence,
|
||||
'active': pattern.active
|
||||
}
|
||||
|
||||
# Recompile regex
|
||||
import re
|
||||
try:
|
||||
detector.patterns[pattern_name]['compiled_regex'] = re.compile(
|
||||
pattern.regex, re.MULTILINE | re.DOTALL
|
||||
)
|
||||
except re.error as e:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid regex: {e}")
|
||||
|
||||
return {"status": "success", "message": f"Pattern {pattern_name} updated"}
|
||||
|
||||
|
||||
@app.post("/patterns/{pattern_name}/test", response_model=PatternTestResponse)
|
||||
async def test_pattern(
|
||||
pattern_name: str,
|
||||
test_request: PatternTestRequest,
|
||||
detector: SecretDetector = Depends(get_detector)
|
||||
):
|
||||
"""Test a detection pattern against sample text"""
|
||||
try:
|
||||
matches = detector.test_pattern(pattern_name, test_request.test_text)
|
||||
return PatternTestResponse(
|
||||
matches=[match.__dict__ for match in matches],
|
||||
match_count=len(matches)
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
|
||||
|
||||
@app.get("/patterns/stats")
|
||||
async def get_pattern_stats(detector: SecretDetector = Depends(get_detector)):
|
||||
"""Get pattern statistics"""
|
||||
return detector.get_pattern_stats()
|
||||
|
||||
|
||||
# Revocation management endpoints
|
||||
@app.get("/revocations", response_model=List[RevocationEventResponse])
|
||||
async def get_revocations(
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
qm: QuarantineManager = Depends(get_quarantine_manager)
|
||||
):
|
||||
"""Get revocation events"""
|
||||
# This would need to be implemented in QuarantineManager
|
||||
raise HTTPException(status_code=501, detail="Not implemented yet")
|
||||
|
||||
|
||||
@app.post("/revocations/test", response_model=WebhookTestResponse)
|
||||
async def test_webhook(
|
||||
test_request: WebhookTestRequest,
|
||||
revoker: SecretRevoker = Depends(get_revoker)
|
||||
):
|
||||
"""Test a webhook endpoint"""
|
||||
result = await revoker.test_webhook_endpoint(test_request.secret_type)
|
||||
return WebhookTestResponse(**result)
|
||||
|
||||
|
||||
@app.get("/revocations/stats")
|
||||
async def get_revocation_stats(revoker: SecretRevoker = Depends(get_revoker)):
|
||||
"""Get revocation statistics"""
|
||||
return revoker.get_stats()
|
||||
|
||||
|
||||
# Administrative endpoints
|
||||
@app.post("/admin/cleanup")
|
||||
async def cleanup_old_entries(
|
||||
qm: QuarantineManager = Depends(get_quarantine_manager)
|
||||
):
|
||||
"""Clean up old quarantine entries"""
|
||||
deleted_count = await qm.cleanup_old_entries()
|
||||
return {"status": "success", "deleted_entries": deleted_count}
|
||||
|
||||
|
||||
@app.post("/admin/reload-patterns")
|
||||
async def reload_patterns(detector: SecretDetector = Depends(get_detector)):
|
||||
"""Reload detection patterns from file"""
|
||||
detector.load_patterns()
|
||||
return {"status": "success", "message": "Patterns reloaded"}
|
||||
|
||||
|
||||
@app.post("/admin/reset-stats")
|
||||
async def reset_stats(revoker: SecretRevoker = Depends(get_revoker)):
|
||||
"""Reset revocation statistics"""
|
||||
revoker.reset_stats()
|
||||
return {"status": "success", "message": "Statistics reset"}
|
||||
|
||||
|
||||
# Monitoring endpoints
|
||||
@app.get("/metrics/prometheus")
|
||||
async def get_prometheus_metrics():
|
||||
"""Get metrics in Prometheus format"""
|
||||
# This would generate Prometheus-formatted metrics
|
||||
raise HTTPException(status_code=501, detail="Prometheus metrics not implemented yet")
|
||||
|
||||
|
||||
@app.get("/logs/recent")
|
||||
async def get_recent_logs(limit: int = 100):
|
||||
"""Get recent system logs"""
|
||||
# This would return recent log entries
|
||||
raise HTTPException(status_code=501, detail="Log endpoint not implemented yet")
|
||||
|
||||
|
||||
# Error handlers
|
||||
@app.exception_handler(Exception)
|
||||
async def general_exception_handler(request, exc):
|
||||
logger.error(f"Unhandled exception: {exc}")
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={"detail": "Internal server error"}
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(
|
||||
"api.main:app",
|
||||
host="127.0.0.1",
|
||||
port=8000,
|
||||
reload=True,
|
||||
log_level="info"
|
||||
)
|
||||
149
modules/shhh/api/models.py
Normal file
149
modules/shhh/api/models.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""
|
||||
Pydantic models for SHHH API endpoints.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class QuarantineEntryResponse(BaseModel):
|
||||
"""Response model for quarantine entries"""
|
||||
id: int
|
||||
timestamp: datetime
|
||||
hypercore_position: int
|
||||
bzzz_message_id: Optional[str] = None
|
||||
secret_type: str
|
||||
severity: str
|
||||
confidence: float
|
||||
redacted_content: str
|
||||
content_hash: str
|
||||
source_agent: str
|
||||
match_count: int
|
||||
reviewed: bool
|
||||
review_action: Optional[str] = None
|
||||
reviewer: Optional[str] = None
|
||||
review_timestamp: Optional[datetime] = None
|
||||
metadata: Dict[str, Any] = {}
|
||||
|
||||
|
||||
class QuarantineReviewRequest(BaseModel):
|
||||
"""Request model for reviewing quarantine entries"""
|
||||
action: str = Field(..., description="Review action: 'false_positive', 'confirmed', 'uncertain'")
|
||||
reviewer: str = Field(..., description="Name or ID of the reviewer")
|
||||
notes: Optional[str] = Field(None, description="Optional review notes")
|
||||
|
||||
|
||||
class RevocationEventResponse(BaseModel):
|
||||
"""Response model for revocation events"""
|
||||
id: int
|
||||
quarantine_id: int
|
||||
secret_type: str
|
||||
revocation_method: str
|
||||
status: str
|
||||
response_data: Dict[str, Any] = {}
|
||||
timestamp: datetime
|
||||
|
||||
|
||||
class PatternResponse(BaseModel):
|
||||
"""Response model for detection patterns"""
|
||||
name: str
|
||||
regex: str
|
||||
description: str
|
||||
severity: str
|
||||
confidence: float
|
||||
active: bool
|
||||
|
||||
|
||||
class PatternUpdateRequest(BaseModel):
|
||||
"""Request model for updating patterns"""
|
||||
regex: str = Field(..., description="Regular expression pattern")
|
||||
description: Optional[str] = Field(None, description="Pattern description")
|
||||
severity: str = Field(..., description="Severity level: LOW, MEDIUM, HIGH, CRITICAL")
|
||||
confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score")
|
||||
active: bool = Field(True, description="Whether pattern is active")
|
||||
|
||||
|
||||
class StatsResponse(BaseModel):
|
||||
"""Response model for system statistics"""
|
||||
total_entries: int
|
||||
pending_review: int
|
||||
critical_count: int
|
||||
high_count: int
|
||||
medium_count: int
|
||||
low_count: int
|
||||
last_24h: int
|
||||
last_7d: int
|
||||
|
||||
|
||||
class SystemHealthResponse(BaseModel):
|
||||
"""Response model for system health"""
|
||||
status: str
|
||||
timestamp: datetime
|
||||
components: Dict[str, Dict[str, Any]]
|
||||
|
||||
|
||||
class ProcessingStatsResponse(BaseModel):
|
||||
"""Response model for processing statistics"""
|
||||
entries_processed: int
|
||||
secrets_detected: int
|
||||
messages_quarantined: int
|
||||
revocations_triggered: int
|
||||
processing_errors: int
|
||||
uptime_hours: Optional[float] = None
|
||||
entries_per_second: Optional[float] = None
|
||||
secrets_per_hour: Optional[float] = None
|
||||
is_running: bool
|
||||
|
||||
|
||||
class AlertRequest(BaseModel):
|
||||
"""Request model for manual alerts"""
|
||||
message: str = Field(..., description="Alert message")
|
||||
severity: str = Field(..., description="Alert severity")
|
||||
source: str = Field(..., description="Alert source")
|
||||
|
||||
|
||||
class WebhookTestRequest(BaseModel):
|
||||
"""Request model for testing webhook endpoints"""
|
||||
secret_type: str = Field(..., description="Secret type to test")
|
||||
|
||||
|
||||
class WebhookTestResponse(BaseModel):
|
||||
"""Response model for webhook tests"""
|
||||
success: bool
|
||||
method: Optional[str] = None
|
||||
response_data: Dict[str, Any] = {}
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class PatternTestRequest(BaseModel):
|
||||
"""Request model for testing detection patterns"""
|
||||
pattern_name: str = Field(..., description="Name of pattern to test")
|
||||
test_text: str = Field(..., description="Text to test against pattern")
|
||||
|
||||
|
||||
class PatternTestResponse(BaseModel):
|
||||
"""Response model for pattern testing"""
|
||||
matches: List[Dict[str, Any]]
|
||||
match_count: int
|
||||
|
||||
|
||||
class SearchRequest(BaseModel):
|
||||
"""Request model for searching quarantine entries"""
|
||||
query: Optional[str] = Field(None, description="Search query")
|
||||
secret_type: Optional[str] = Field(None, description="Filter by secret type")
|
||||
severity: Optional[str] = Field(None, description="Filter by severity")
|
||||
reviewed: Optional[bool] = Field(None, description="Filter by review status")
|
||||
start_date: Optional[datetime] = Field(None, description="Start date filter")
|
||||
end_date: Optional[datetime] = Field(None, description="End date filter")
|
||||
limit: int = Field(100, ge=1, le=1000, description="Result limit")
|
||||
offset: int = Field(0, ge=0, description="Result offset")
|
||||
|
||||
|
||||
class PaginatedResponse(BaseModel):
|
||||
"""Generic paginated response model"""
|
||||
items: List[Any]
|
||||
total: int
|
||||
limit: int
|
||||
offset: int
|
||||
has_more: bool
|
||||
4
modules/shhh/automation/__init__.py
Normal file
4
modules/shhh/automation/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# SHHH Automation Module
|
||||
"""
|
||||
Automated response and revocation systems for secret detection.
|
||||
"""
|
||||
474
modules/shhh/automation/revocation.py
Normal file
474
modules/shhh/automation/revocation.py
Normal file
@@ -0,0 +1,474 @@
|
||||
"""
|
||||
Automated Secret Revocation System for SHHH Secrets Sentinel
|
||||
Provides automated response capabilities for different types of detected secrets.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import json
|
||||
import time
|
||||
from typing import Dict, Any, Optional, List
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
import structlog
|
||||
|
||||
from ..core.quarantine import QuarantineEntry, RevocationEvent, QuarantineManager
|
||||
from ..core.detector import SecretMatch
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
@dataclass
|
||||
class RevocationRequest:
|
||||
"""Represents a request to revoke a secret"""
|
||||
quarantine_id: int
|
||||
secret_type: str
|
||||
redacted_secret: str
|
||||
urgency: str # 'immediate', 'high', 'medium', 'low'
|
||||
metadata: Dict[str, Any]
|
||||
|
||||
|
||||
@dataclass
|
||||
class RevocationResponse:
|
||||
"""Represents the response from a revocation attempt"""
|
||||
success: bool
|
||||
method: str
|
||||
response_data: Dict[str, Any]
|
||||
error_message: Optional[str] = None
|
||||
revocation_id: Optional[str] = None
|
||||
|
||||
|
||||
class SecretRevoker:
|
||||
"""
|
||||
Automated secret revocation system that integrates with various cloud providers
|
||||
and services to automatically disable compromised credentials.
|
||||
"""
|
||||
|
||||
def __init__(self, quarantine_manager: QuarantineManager, webhook_config: Dict[str, str] = None):
|
||||
self.quarantine = quarantine_manager
|
||||
self.webhook_config = webhook_config or {}
|
||||
|
||||
# Revocation timeouts and retry settings
|
||||
self.request_timeout = 10 # seconds
|
||||
self.max_retries = 3
|
||||
self.retry_delay = 2 # seconds
|
||||
|
||||
# Statistics
|
||||
self.stats = {
|
||||
'total_revocations': 0,
|
||||
'successful_revocations': 0,
|
||||
'failed_revocations': 0,
|
||||
'revocations_by_type': {},
|
||||
'last_reset': datetime.now()
|
||||
}
|
||||
|
||||
logger.info("Initialized SecretRevoker")
|
||||
|
||||
async def trigger_revocation(self, quarantine_entry: QuarantineEntry) -> Optional[RevocationResponse]:
|
||||
"""Trigger automatic revocation for a quarantined secret"""
|
||||
try:
|
||||
revocation_request = RevocationRequest(
|
||||
quarantine_id=quarantine_entry.id,
|
||||
secret_type=quarantine_entry.secret_type,
|
||||
redacted_secret=self._extract_redacted_from_metadata(quarantine_entry),
|
||||
urgency=self._determine_urgency(quarantine_entry.severity),
|
||||
metadata={
|
||||
'source_agent': quarantine_entry.source_agent,
|
||||
'detection_timestamp': quarantine_entry.timestamp.isoformat(),
|
||||
'confidence': quarantine_entry.confidence,
|
||||
'hypercore_position': quarantine_entry.hypercore_position
|
||||
}
|
||||
)
|
||||
|
||||
# Determine revocation method
|
||||
revocation_method = self._get_revocation_method(quarantine_entry.secret_type)
|
||||
if not revocation_method:
|
||||
logger.warning(f"No revocation method configured for {quarantine_entry.secret_type}")
|
||||
return None
|
||||
|
||||
# Attempt revocation
|
||||
response = await self._execute_revocation(revocation_request, revocation_method)
|
||||
|
||||
# Record the revocation event
|
||||
await self._record_revocation_event(quarantine_entry, response)
|
||||
|
||||
# Update statistics
|
||||
self._update_stats(quarantine_entry.secret_type, response.success)
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to trigger revocation for quarantine {quarantine_entry.id}: {e}")
|
||||
return None
|
||||
|
||||
def _extract_redacted_from_metadata(self, quarantine_entry: QuarantineEntry) -> str:
|
||||
"""Extract redacted secret from quarantine metadata"""
|
||||
try:
|
||||
matches = quarantine_entry.metadata.get('matches', [])
|
||||
if matches:
|
||||
# Get the first match's redacted text
|
||||
return matches[0].get('redacted_text', 'REDACTED')
|
||||
except:
|
||||
pass
|
||||
|
||||
return 'REDACTED'
|
||||
|
||||
def _determine_urgency(self, severity: str) -> str:
|
||||
"""Determine revocation urgency based on severity"""
|
||||
urgency_map = {
|
||||
'CRITICAL': 'immediate',
|
||||
'HIGH': 'high',
|
||||
'MEDIUM': 'medium',
|
||||
'LOW': 'low'
|
||||
}
|
||||
return urgency_map.get(severity, 'medium')
|
||||
|
||||
def _get_revocation_method(self, secret_type: str) -> Optional[str]:
|
||||
"""Get the appropriate revocation method for a secret type"""
|
||||
method_map = {
|
||||
'AWS_ACCESS_KEY': 'aws_iam_revocation',
|
||||
'AWS_SECRET_KEY': 'aws_iam_revocation',
|
||||
'GITHUB_TOKEN': 'github_token_revocation',
|
||||
'GITHUB_OAUTH': 'github_token_revocation',
|
||||
'SLACK_TOKEN': 'slack_token_revocation',
|
||||
'GOOGLE_API_KEY': 'google_api_revocation',
|
||||
'DOCKER_TOKEN': 'docker_token_revocation'
|
||||
}
|
||||
return method_map.get(secret_type)
|
||||
|
||||
async def _execute_revocation(self, request: RevocationRequest, method: str) -> RevocationResponse:
|
||||
"""Execute the actual revocation based on the method"""
|
||||
method_handlers = {
|
||||
'aws_iam_revocation': self._revoke_aws_credentials,
|
||||
'github_token_revocation': self._revoke_github_token,
|
||||
'slack_token_revocation': self._revoke_slack_token,
|
||||
'google_api_revocation': self._revoke_google_api_key,
|
||||
'docker_token_revocation': self._revoke_docker_token,
|
||||
'webhook_revocation': self._revoke_via_webhook
|
||||
}
|
||||
|
||||
handler = method_handlers.get(method, self._revoke_via_webhook)
|
||||
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
response = await handler(request)
|
||||
if response.success:
|
||||
logger.info(
|
||||
f"Successfully revoked {request.secret_type}",
|
||||
quarantine_id=request.quarantine_id,
|
||||
method=method,
|
||||
attempt=attempt + 1
|
||||
)
|
||||
return response
|
||||
|
||||
# Log failure and retry if not successful
|
||||
logger.warning(
|
||||
f"Revocation attempt {attempt + 1} failed",
|
||||
quarantine_id=request.quarantine_id,
|
||||
method=method,
|
||||
error=response.error_message
|
||||
)
|
||||
|
||||
if attempt < self.max_retries - 1:
|
||||
await asyncio.sleep(self.retry_delay * (attempt + 1)) # Exponential backoff
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Revocation attempt {attempt + 1} error: {e}")
|
||||
if attempt < self.max_retries - 1:
|
||||
await asyncio.sleep(self.retry_delay * (attempt + 1))
|
||||
|
||||
# All attempts failed
|
||||
return RevocationResponse(
|
||||
success=False,
|
||||
method=method,
|
||||
response_data={},
|
||||
error_message=f"All {self.max_retries} revocation attempts failed"
|
||||
)
|
||||
|
||||
async def _revoke_aws_credentials(self, request: RevocationRequest) -> RevocationResponse:
|
||||
"""Revoke AWS credentials via webhook"""
|
||||
webhook_url = self.webhook_config.get('AWS_ACCESS_KEY')
|
||||
if not webhook_url:
|
||||
return RevocationResponse(
|
||||
success=False,
|
||||
method='aws_iam_revocation',
|
||||
response_data={},
|
||||
error_message="No AWS revocation webhook configured"
|
||||
)
|
||||
|
||||
payload = {
|
||||
'event': 'secret_leak_detected',
|
||||
'secret_type': request.secret_type,
|
||||
'redacted_key': request.redacted_secret,
|
||||
'urgency': request.urgency,
|
||||
'quarantine_id': request.quarantine_id,
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'recommended_action': 'Revoke IAM access key immediately',
|
||||
'metadata': request.metadata
|
||||
}
|
||||
|
||||
return await self._send_webhook_request(webhook_url, payload, 'aws_iam_revocation')
|
||||
|
||||
async def _revoke_github_token(self, request: RevocationRequest) -> RevocationResponse:
|
||||
"""Revoke GitHub token via webhook"""
|
||||
webhook_url = self.webhook_config.get('GITHUB_TOKEN')
|
||||
if not webhook_url:
|
||||
return RevocationResponse(
|
||||
success=False,
|
||||
method='github_token_revocation',
|
||||
response_data={},
|
||||
error_message="No GitHub revocation webhook configured"
|
||||
)
|
||||
|
||||
payload = {
|
||||
'event': 'secret_leak_detected',
|
||||
'secret_type': request.secret_type,
|
||||
'redacted_key': request.redacted_secret,
|
||||
'urgency': request.urgency,
|
||||
'quarantine_id': request.quarantine_id,
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'recommended_action': 'Revoke GitHub token via API or settings',
|
||||
'metadata': request.metadata
|
||||
}
|
||||
|
||||
return await self._send_webhook_request(webhook_url, payload, 'github_token_revocation')
|
||||
|
||||
async def _revoke_slack_token(self, request: RevocationRequest) -> RevocationResponse:
|
||||
"""Revoke Slack token via webhook"""
|
||||
webhook_url = self.webhook_config.get('SLACK_TOKEN')
|
||||
if not webhook_url:
|
||||
return RevocationResponse(
|
||||
success=False,
|
||||
method='slack_token_revocation',
|
||||
response_data={},
|
||||
error_message="No Slack revocation webhook configured"
|
||||
)
|
||||
|
||||
payload = {
|
||||
'event': 'secret_leak_detected',
|
||||
'secret_type': request.secret_type,
|
||||
'redacted_key': request.redacted_secret,
|
||||
'urgency': request.urgency,
|
||||
'quarantine_id': request.quarantine_id,
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'recommended_action': 'Revoke Slack token via Admin API',
|
||||
'metadata': request.metadata
|
||||
}
|
||||
|
||||
return await self._send_webhook_request(webhook_url, payload, 'slack_token_revocation')
|
||||
|
||||
async def _revoke_google_api_key(self, request: RevocationRequest) -> RevocationResponse:
|
||||
"""Revoke Google API key via webhook"""
|
||||
webhook_url = self.webhook_config.get('GOOGLE_API_KEY')
|
||||
if not webhook_url:
|
||||
return RevocationResponse(
|
||||
success=False,
|
||||
method='google_api_revocation',
|
||||
response_data={},
|
||||
error_message="No Google API revocation webhook configured"
|
||||
)
|
||||
|
||||
payload = {
|
||||
'event': 'secret_leak_detected',
|
||||
'secret_type': request.secret_type,
|
||||
'redacted_key': request.redacted_secret,
|
||||
'urgency': request.urgency,
|
||||
'quarantine_id': request.quarantine_id,
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'recommended_action': 'Revoke API key via Google Cloud Console',
|
||||
'metadata': request.metadata
|
||||
}
|
||||
|
||||
return await self._send_webhook_request(webhook_url, payload, 'google_api_revocation')
|
||||
|
||||
async def _revoke_docker_token(self, request: RevocationRequest) -> RevocationResponse:
|
||||
"""Revoke Docker token via webhook"""
|
||||
webhook_url = self.webhook_config.get('DOCKER_TOKEN')
|
||||
if not webhook_url:
|
||||
return RevocationResponse(
|
||||
success=False,
|
||||
method='docker_token_revocation',
|
||||
response_data={},
|
||||
error_message="No Docker revocation webhook configured"
|
||||
)
|
||||
|
||||
payload = {
|
||||
'event': 'secret_leak_detected',
|
||||
'secret_type': request.secret_type,
|
||||
'redacted_key': request.redacted_secret,
|
||||
'urgency': request.urgency,
|
||||
'quarantine_id': request.quarantine_id,
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'recommended_action': 'Revoke Docker token via Hub settings',
|
||||
'metadata': request.metadata
|
||||
}
|
||||
|
||||
return await self._send_webhook_request(webhook_url, payload, 'docker_token_revocation')
|
||||
|
||||
async def _revoke_via_webhook(self, request: RevocationRequest) -> RevocationResponse:
|
||||
"""Generic webhook revocation for unknown secret types"""
|
||||
# Try to find a generic webhook endpoint
|
||||
webhook_url = self.webhook_config.get('GENERIC',
|
||||
self.webhook_config.get('DEFAULT'))
|
||||
|
||||
if not webhook_url:
|
||||
return RevocationResponse(
|
||||
success=False,
|
||||
method='webhook_revocation',
|
||||
response_data={},
|
||||
error_message=f"No webhook configured for {request.secret_type}"
|
||||
)
|
||||
|
||||
payload = {
|
||||
'event': 'secret_leak_detected',
|
||||
'secret_type': request.secret_type,
|
||||
'redacted_key': request.redacted_secret,
|
||||
'urgency': request.urgency,
|
||||
'quarantine_id': request.quarantine_id,
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'recommended_action': 'Manual review and revocation required',
|
||||
'metadata': request.metadata
|
||||
}
|
||||
|
||||
return await self._send_webhook_request(webhook_url, payload, 'webhook_revocation')
|
||||
|
||||
async def _send_webhook_request(self, url: str, payload: Dict[str, Any], method: str) -> RevocationResponse:
|
||||
"""Send webhook request and handle response"""
|
||||
try:
|
||||
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=self.request_timeout)) as session:
|
||||
async with session.post(url, json=payload) as response:
|
||||
response_data = {}
|
||||
try:
|
||||
response_data = await response.json()
|
||||
except:
|
||||
response_data = {'text': await response.text()}
|
||||
|
||||
if response.status == 200:
|
||||
return RevocationResponse(
|
||||
success=True,
|
||||
method=method,
|
||||
response_data=response_data,
|
||||
revocation_id=response_data.get('revocation_id')
|
||||
)
|
||||
else:
|
||||
return RevocationResponse(
|
||||
success=False,
|
||||
method=method,
|
||||
response_data=response_data,
|
||||
error_message=f"HTTP {response.status}: {response_data}"
|
||||
)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
return RevocationResponse(
|
||||
success=False,
|
||||
method=method,
|
||||
response_data={},
|
||||
error_message=f"Webhook request timed out after {self.request_timeout}s"
|
||||
)
|
||||
except Exception as e:
|
||||
return RevocationResponse(
|
||||
success=False,
|
||||
method=method,
|
||||
response_data={},
|
||||
error_message=f"Webhook request failed: {str(e)}"
|
||||
)
|
||||
|
||||
async def _record_revocation_event(self, quarantine_entry: QuarantineEntry, response: RevocationResponse):
|
||||
"""Record revocation event in the database"""
|
||||
try:
|
||||
revocation_event = RevocationEvent(
|
||||
quarantine_id=quarantine_entry.id,
|
||||
secret_type=quarantine_entry.secret_type,
|
||||
revocation_method=response.method,
|
||||
status='success' if response.success else 'failed',
|
||||
response_data=response.response_data,
|
||||
timestamp=datetime.now()
|
||||
)
|
||||
|
||||
await self.quarantine.record_revocation(revocation_event)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to record revocation event: {e}")
|
||||
|
||||
def _update_stats(self, secret_type: str, success: bool):
|
||||
"""Update revocation statistics"""
|
||||
self.stats['total_revocations'] += 1
|
||||
|
||||
if success:
|
||||
self.stats['successful_revocations'] += 1
|
||||
else:
|
||||
self.stats['failed_revocations'] += 1
|
||||
|
||||
# Update by-type stats
|
||||
if secret_type not in self.stats['revocations_by_type']:
|
||||
self.stats['revocations_by_type'][secret_type] = {
|
||||
'total': 0,
|
||||
'successful': 0,
|
||||
'failed': 0
|
||||
}
|
||||
|
||||
type_stats = self.stats['revocations_by_type'][secret_type]
|
||||
type_stats['total'] += 1
|
||||
|
||||
if success:
|
||||
type_stats['successful'] += 1
|
||||
else:
|
||||
type_stats['failed'] += 1
|
||||
|
||||
async def test_webhook_endpoint(self, secret_type: str) -> Dict[str, Any]:
|
||||
"""Test a webhook endpoint with a test payload"""
|
||||
webhook_url = self.webhook_config.get(secret_type)
|
||||
if not webhook_url:
|
||||
return {
|
||||
'success': False,
|
||||
'error': f'No webhook configured for {secret_type}'
|
||||
}
|
||||
|
||||
test_payload = {
|
||||
'event': 'webhook_test',
|
||||
'secret_type': secret_type,
|
||||
'test': True,
|
||||
'timestamp': datetime.now().isoformat()
|
||||
}
|
||||
|
||||
try:
|
||||
response = await self._send_webhook_request(webhook_url, test_payload, 'test')
|
||||
return {
|
||||
'success': response.success,
|
||||
'method': response.method,
|
||||
'response_data': response.response_data,
|
||||
'error': response.error_message
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
'success': False,
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get revocation statistics"""
|
||||
current_time = datetime.now()
|
||||
uptime_hours = (current_time - self.stats['last_reset']).total_seconds() / 3600
|
||||
|
||||
stats = self.stats.copy()
|
||||
stats.update({
|
||||
'uptime_hours': round(uptime_hours, 2),
|
||||
'success_rate': (
|
||||
self.stats['successful_revocations'] / max(1, self.stats['total_revocations'])
|
||||
) * 100,
|
||||
'configured_webhooks': list(self.webhook_config.keys())
|
||||
})
|
||||
|
||||
return stats
|
||||
|
||||
def reset_stats(self):
|
||||
"""Reset statistics counters"""
|
||||
self.stats = {
|
||||
'total_revocations': 0,
|
||||
'successful_revocations': 0,
|
||||
'failed_revocations': 0,
|
||||
'revocations_by_type': {},
|
||||
'last_reset': datetime.now()
|
||||
}
|
||||
|
||||
logger.info("SecretRevoker statistics reset")
|
||||
33
modules/shhh/config.yaml
Normal file
33
modules/shhh/config.yaml
Normal file
@@ -0,0 +1,33 @@
|
||||
# Configuration for the SHHH Secrets Sentinel
|
||||
|
||||
# -- File Paths --
|
||||
# Path to the primary, raw hypercore log to be monitored.
|
||||
primary_log_path: '/home/tony/AI/projects/chorus.services/modules/shhh/primary.log'
|
||||
|
||||
# Path where the sanitized sister hypercore log will be written.
|
||||
sanitized_log_path: '/home/tony/AI/projects/chorus.services/modules/shhh/sanitized.log'
|
||||
|
||||
# Path to the YAML file containing regex patterns for secret detection.
|
||||
patterns_file: 'patterns.yaml'
|
||||
|
||||
# Path to the system prompt file for the LLM agent.
|
||||
shhh_agent_prompt_file: 'SHHH_SECRETS_SENTINEL_AGENT_PROMPT.md'
|
||||
|
||||
|
||||
# -- Database --
|
||||
# Connection string for the PostgreSQL database used for quarantining secrets.
|
||||
# Format: postgresql://user:password@host:port/database
|
||||
database_url: 'postgresql://shhh:password@localhost:5432/shhh_sentinel'
|
||||
|
||||
|
||||
# -- LLM Analyzer (Ollama) --
|
||||
# The API endpoint for the Ollama instance.
|
||||
ollama_endpoint: 'http://localhost:11434/api/generate'
|
||||
|
||||
# The name of the model to use from Ollama (e.g., llama3, codellama).
|
||||
ollama_model: 'llama3'
|
||||
|
||||
# The confidence score threshold for regex matches.
|
||||
# Matches with confidence >= this value will be quarantined immediately, skipping the LLM.
|
||||
# Matches with confidence < this value will be sent to the LLM for verification.
|
||||
llm_confidence_threshold: 0.90
|
||||
6
modules/shhh/core/__init__.py
Normal file
6
modules/shhh/core/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
# SHHH Core Module
|
||||
"""
|
||||
Core components for the SHHH Secrets Sentinel system.
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
52
modules/shhh/core/detector.py
Normal file
52
modules/shhh/core/detector.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import re
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
|
||||
class SecretDetector:
|
||||
"""
|
||||
A simplified secret detection engine using configurable regex patterns.
|
||||
It scans text for secrets, redacts them, and provides metadata.
|
||||
"""
|
||||
def __init__(self, patterns_file: str = "patterns.yaml"):
|
||||
self.patterns_file = Path(patterns_file)
|
||||
self.patterns = self._load_patterns()
|
||||
|
||||
def _load_patterns(self) -> dict:
|
||||
"""Load detection patterns from YAML configuration."""
|
||||
try:
|
||||
with open(self.patterns_file, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
patterns = config.get('patterns', {})
|
||||
# Pre-compile regex for efficiency
|
||||
for name, props in patterns.items():
|
||||
if props.get('active', True):
|
||||
props['compiled_regex'] = re.compile(props['regex'])
|
||||
return patterns
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Failed to load patterns from {self.patterns_file}: {e}")
|
||||
return {}
|
||||
|
||||
def scan(self, text: str) -> list[dict]:
|
||||
"""Scans text and returns a list of found secrets with metadata."""
|
||||
matches = []
|
||||
for pattern_name, pattern in self.patterns.items():
|
||||
if pattern.get('active', True) and 'compiled_regex' in pattern:
|
||||
regex_match = pattern['compiled_regex'].search(text)
|
||||
if regex_match:
|
||||
matches.append({
|
||||
"secret_type": pattern_name,
|
||||
"value": regex_match.group(0),
|
||||
"confidence": pattern.get("confidence", 0.8),
|
||||
"severity": pattern.get("severity", "MEDIUM")
|
||||
})
|
||||
return matches
|
||||
|
||||
def redact(self, text: str, secret_value: str) -> str:
|
||||
"""Redacts a specific secret value within a string."""
|
||||
# Ensure we don't reveal too much for very short secrets
|
||||
if len(secret_value) < 8:
|
||||
return text.replace(secret_value, "[REDACTED]")
|
||||
|
||||
redacted_str = secret_value[:4] + "****" + secret_value[-4:]
|
||||
return text.replace(secret_value, f"[REDACTED:{redacted_str}]")
|
||||
35
modules/shhh/core/hypercore_reader.py
Normal file
35
modules/shhh/core/hypercore_reader.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
|
||||
class LogEntry:
|
||||
"""A mock log entry object for testing purposes."""
|
||||
def __init__(self, content):
|
||||
self.content = content
|
||||
self.timestamp = datetime.now()
|
||||
# Add other fields as needed to match the processor's expectations
|
||||
self.source_agent = "mock_agent"
|
||||
self.message_type = "mock_message"
|
||||
self.metadata = {}
|
||||
self.is_bzzz_message = False
|
||||
self.bzzz_message_id = None
|
||||
|
||||
class HypercoreReader:
|
||||
"""
|
||||
A simplified, mock HypercoreReader that reads from a plain text file
|
||||
to simulate a stream of log entries for testing.
|
||||
"""
|
||||
def __init__(self, log_path: str, **kwargs):
|
||||
self.log_path = log_path
|
||||
|
||||
async def stream_entries(self):
|
||||
"""
|
||||
An async generator that yields log entries from a text file.
|
||||
"""
|
||||
try:
|
||||
with open(self.log_path, 'r') as f:
|
||||
for line in f:
|
||||
yield LogEntry(line.strip())
|
||||
await asyncio.sleep(0.01) # Simulate async behavior
|
||||
except FileNotFoundError:
|
||||
print(f"[ERROR] Hypercore log file not found at: {self.log_path}")
|
||||
return
|
||||
44
modules/shhh/core/llm_analyzer.py
Normal file
44
modules/shhh/core/llm_analyzer.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import requests
|
||||
import json
|
||||
|
||||
class LLMAnalyzer:
|
||||
"""Analyzes text for secrets using a local LLM via Ollama."""
|
||||
|
||||
def __init__(self, endpoint: str, model: str, system_prompt: str):
|
||||
self.endpoint = endpoint
|
||||
self.model = model
|
||||
self.system_prompt = system_prompt
|
||||
|
||||
def analyze(self, text: str) -> dict:
|
||||
"""
|
||||
Sends text to the Ollama API for analysis and returns a structured JSON response.
|
||||
|
||||
Returns:
|
||||
A dictionary like:
|
||||
{
|
||||
"secret_found": bool,
|
||||
"secret_type": str,
|
||||
"confidence_score": float,
|
||||
"severity": str
|
||||
}
|
||||
Returns a default "not found" response on error.
|
||||
"""
|
||||
prompt = f"Log entry: \"{text}\"\n\nAnalyze this for secrets and respond with only the required JSON."
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"system": self.system_prompt,
|
||||
"prompt": prompt,
|
||||
"format": "json",
|
||||
"stream": False
|
||||
}
|
||||
try:
|
||||
response = requests.post(self.endpoint, json=payload, timeout=15)
|
||||
response.raise_for_status()
|
||||
# The response from Ollama is a JSON string, which needs to be parsed.
|
||||
analysis = json.loads(response.json().get("response", "{}"))
|
||||
return analysis
|
||||
except (requests.exceptions.RequestException, json.JSONDecodeError) as e:
|
||||
print(f"[ERROR] LLMAnalyzer failed: {e}")
|
||||
# Fallback: If LLM fails, assume no secret was found to avoid blocking the pipeline.
|
||||
return {"secret_found": False}
|
||||
|
||||
22
modules/shhh/core/quarantine.py
Normal file
22
modules/shhh/core/quarantine.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from datetime import datetime
|
||||
|
||||
class QuarantineManager:
|
||||
"""
|
||||
A simplified, mock QuarantineManager for testing purposes.
|
||||
It prints quarantined messages to the console instead of saving to a database.
|
||||
"""
|
||||
def __init__(self, database_url: str, **kwargs):
|
||||
print(f"[MockQuarantine] Initialized with db_url: {database_url}")
|
||||
|
||||
def quarantine_message(self, message, secret_type: str, severity: str, redacted_content: str):
|
||||
"""
|
||||
Prints a quarantined message to the console.
|
||||
"""
|
||||
print("\n--- QUARANTINE ALERT ---")
|
||||
print(f"Timestamp: {datetime.now().isoformat()}")
|
||||
print(f"Severity: {severity}")
|
||||
print(f"Secret Type: {secret_type}")
|
||||
print(f"Original Content (from mock): {message.content}")
|
||||
print(f"Redacted Content: {redacted_content}")
|
||||
print("------------------------\n")
|
||||
|
||||
16
modules/shhh/core/sanitized_writer.py
Normal file
16
modules/shhh/core/sanitized_writer.py
Normal file
@@ -0,0 +1,16 @@
|
||||
class SanitizedWriter:
|
||||
"""Writes log entries to the sanitized sister hypercore log."""
|
||||
|
||||
def __init__(self, sanitized_log_path: str):
|
||||
self.log_path = sanitized_log_path
|
||||
# Placeholder for hypercore writing logic. For now, we'll append to a file.
|
||||
self.log_file = open(self.log_path, "a")
|
||||
|
||||
def write(self, log_entry: str):
|
||||
"""Writes a single log entry to the sanitized stream."""
|
||||
self.log_file.write(log_entry + "\n")
|
||||
self.log_file.flush()
|
||||
|
||||
def close(self):
|
||||
self.log_file.close()
|
||||
|
||||
4
modules/shhh/integrations/__init__.py
Normal file
4
modules/shhh/integrations/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# SHHH Integrations Module
|
||||
"""
|
||||
Integration components for BZZZ network and external systems.
|
||||
"""
|
||||
369
modules/shhh/integrations/bzzz_interceptor.py
Normal file
369
modules/shhh/integrations/bzzz_interceptor.py
Normal file
@@ -0,0 +1,369 @@
|
||||
"""
|
||||
BZZZ Message Interceptor for SHHH Secrets Sentinel
|
||||
Intercepts and validates BZZZ P2P messages before network propagation.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
from typing import Dict, Any, Optional, Set, Callable
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
import structlog
|
||||
|
||||
from ..core.hypercore_reader import BzzzMessage
|
||||
from ..core.detector import SecretDetector, DetectionResult
|
||||
from ..core.quarantine import QuarantineManager
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
@dataclass
|
||||
class BlockedMessage:
|
||||
"""Represents a blocked BZZZ message"""
|
||||
message_id: str
|
||||
sender_agent: str
|
||||
block_reason: str
|
||||
secret_types: list
|
||||
timestamp: datetime
|
||||
quarantine_id: Optional[int] = None
|
||||
|
||||
|
||||
class BzzzInterceptor:
|
||||
"""
|
||||
Intercepts BZZZ P2P messages before transmission to prevent secret leakage.
|
||||
Integrates with the BZZZ network layer to scan messages in real-time.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
detector: SecretDetector,
|
||||
quarantine_manager: QuarantineManager,
|
||||
bzzz_config: Dict[str, Any] = None
|
||||
):
|
||||
self.detector = detector
|
||||
self.quarantine = quarantine_manager
|
||||
self.bzzz_config = bzzz_config or {}
|
||||
|
||||
# Message blocking state
|
||||
self.blocked_messages: Dict[str, BlockedMessage] = {}
|
||||
self.message_hooks: Set[Callable] = set()
|
||||
self.is_active = False
|
||||
|
||||
# Statistics
|
||||
self.stats = {
|
||||
'total_scanned': 0,
|
||||
'secrets_detected': 0,
|
||||
'messages_blocked': 0,
|
||||
'false_positives': 0,
|
||||
'last_reset': datetime.now()
|
||||
}
|
||||
|
||||
logger.info("Initialized BzzzInterceptor")
|
||||
|
||||
async def start(self):
|
||||
"""Start the BZZZ message interception service"""
|
||||
self.is_active = True
|
||||
logger.info("BZZZ Interceptor started - all outgoing messages will be scanned")
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the BZZZ message interception service"""
|
||||
self.is_active = False
|
||||
logger.info("BZZZ Interceptor stopped")
|
||||
|
||||
def install_message_hook(self, hook_function: Callable):
|
||||
"""Install a message hook for BZZZ network integration"""
|
||||
self.message_hooks.add(hook_function)
|
||||
logger.info(f"Installed BZZZ message hook: {hook_function.__name__}")
|
||||
|
||||
def remove_message_hook(self, hook_function: Callable):
|
||||
"""Remove a message hook"""
|
||||
self.message_hooks.discard(hook_function)
|
||||
logger.info(f"Removed BZZZ message hook: {hook_function.__name__}")
|
||||
|
||||
async def intercept_outgoing_message(self, message: BzzzMessage) -> bool:
|
||||
"""
|
||||
Intercept and scan an outgoing BZZZ message.
|
||||
Returns True if message should be allowed, False if blocked.
|
||||
"""
|
||||
if not self.is_active:
|
||||
return True # Pass through if interceptor is inactive
|
||||
|
||||
start_time = time.time()
|
||||
self.stats['total_scanned'] += 1
|
||||
|
||||
try:
|
||||
# Scan message for secrets
|
||||
detection_result = self.detector.scan_bzzz_message(message)
|
||||
|
||||
if detection_result.has_secrets:
|
||||
await self._handle_secret_detection(message, detection_result)
|
||||
return False # Block message
|
||||
|
||||
# Message is clean, allow transmission
|
||||
processing_time = (time.time() - start_time) * 1000
|
||||
logger.debug(
|
||||
f"BZZZ message scanned clean",
|
||||
message_id=message.message_id,
|
||||
sender=message.sender_agent,
|
||||
processing_time_ms=processing_time
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error intercepting BZZZ message: {e}")
|
||||
# On error, default to blocking for security
|
||||
await self._block_message_on_error(message, str(e))
|
||||
return False
|
||||
|
||||
async def _handle_secret_detection(self, message: BzzzMessage, detection_result: DetectionResult):
|
||||
"""Handle detection of secrets in a BZZZ message"""
|
||||
self.stats['secrets_detected'] += 1
|
||||
self.stats['messages_blocked'] += 1
|
||||
|
||||
# Extract secret types for blocking record
|
||||
secret_types = [match.secret_type for match in detection_result.matches]
|
||||
|
||||
# Quarantine the detection result
|
||||
quarantine_entry = await self.quarantine.quarantine_detection(detection_result)
|
||||
|
||||
# Create blocked message record
|
||||
blocked_msg = BlockedMessage(
|
||||
message_id=message.message_id,
|
||||
sender_agent=message.sender_agent,
|
||||
block_reason=f"Secrets detected: {', '.join(secret_types)}",
|
||||
secret_types=secret_types,
|
||||
timestamp=datetime.now(),
|
||||
quarantine_id=quarantine_entry.id
|
||||
)
|
||||
|
||||
self.blocked_messages[message.message_id] = blocked_msg
|
||||
|
||||
# Notify BZZZ network layer
|
||||
await self._notify_message_blocked(message, blocked_msg)
|
||||
|
||||
logger.critical(
|
||||
f"BLOCKED BZZZ message containing secrets",
|
||||
message_id=message.message_id,
|
||||
sender=message.sender_agent,
|
||||
recipient=message.recipient_agent,
|
||||
secret_types=secret_types,
|
||||
severity=detection_result.max_severity,
|
||||
quarantine_id=quarantine_entry.id
|
||||
)
|
||||
|
||||
async def _block_message_on_error(self, message: BzzzMessage, error_msg: str):
|
||||
"""Block a message due to processing error"""
|
||||
self.stats['messages_blocked'] += 1
|
||||
|
||||
blocked_msg = BlockedMessage(
|
||||
message_id=message.message_id,
|
||||
sender_agent=message.sender_agent,
|
||||
block_reason=f"Processing error: {error_msg}",
|
||||
secret_types=[],
|
||||
timestamp=datetime.now()
|
||||
)
|
||||
|
||||
self.blocked_messages[message.message_id] = blocked_msg
|
||||
await self._notify_message_blocked(message, blocked_msg)
|
||||
|
||||
logger.error(
|
||||
f"BLOCKED BZZZ message due to error",
|
||||
message_id=message.message_id,
|
||||
sender=message.sender_agent,
|
||||
error=error_msg
|
||||
)
|
||||
|
||||
async def _notify_message_blocked(self, message: BzzzMessage, blocked_msg: BlockedMessage):
|
||||
"""Notify BZZZ network and sender about blocked message"""
|
||||
notification = {
|
||||
'event': 'message_blocked',
|
||||
'message_id': message.message_id,
|
||||
'sender_agent': message.sender_agent,
|
||||
'recipient_agent': message.recipient_agent,
|
||||
'block_reason': blocked_msg.block_reason,
|
||||
'secret_types': blocked_msg.secret_types,
|
||||
'timestamp': blocked_msg.timestamp.isoformat(),
|
||||
'quarantine_id': blocked_msg.quarantine_id
|
||||
}
|
||||
|
||||
# Notify all registered hooks
|
||||
for hook in self.message_hooks:
|
||||
try:
|
||||
await self._call_hook_safely(hook, 'message_blocked', notification)
|
||||
except Exception as e:
|
||||
logger.warning(f"Hook {hook.__name__} failed: {e}")
|
||||
|
||||
# Send notification back to sender agent
|
||||
await self._notify_sender_agent(message.sender_agent, notification)
|
||||
|
||||
async def _call_hook_safely(self, hook: Callable, event_type: str, data: Dict[str, Any]):
|
||||
"""Safely call a hook function with error handling"""
|
||||
try:
|
||||
if asyncio.iscoroutinefunction(hook):
|
||||
await hook(event_type, data)
|
||||
else:
|
||||
hook(event_type, data)
|
||||
except Exception as e:
|
||||
logger.warning(f"Hook {hook.__name__} failed: {e}")
|
||||
|
||||
async def _notify_sender_agent(self, sender_agent: str, notification: Dict[str, Any]):
|
||||
"""Send notification to the sender agent about blocked message"""
|
||||
try:
|
||||
# This would integrate with the BZZZ network's agent communication system
|
||||
# For now, we'll log the notification
|
||||
logger.info(
|
||||
f"Notifying agent about blocked message",
|
||||
agent=sender_agent,
|
||||
message_id=notification['message_id'],
|
||||
reason=notification['block_reason']
|
||||
)
|
||||
|
||||
# TODO: Implement actual agent notification via BZZZ network
|
||||
# This might involve:
|
||||
# - Sending a system message back to the agent
|
||||
# - Updating agent's message status
|
||||
# - Triggering agent's error handling workflow
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to notify sender agent {sender_agent}: {e}")
|
||||
|
||||
def is_message_blocked(self, message_id: str) -> Optional[BlockedMessage]:
|
||||
"""Check if a message is blocked"""
|
||||
return self.blocked_messages.get(message_id)
|
||||
|
||||
def unblock_message(self, message_id: str, reviewer: str, reason: str) -> bool:
|
||||
"""Unblock a previously blocked message (for false positives)"""
|
||||
if message_id not in self.blocked_messages:
|
||||
return False
|
||||
|
||||
blocked_msg = self.blocked_messages[message_id]
|
||||
|
||||
# Mark as false positive in stats
|
||||
self.stats['false_positives'] += 1
|
||||
|
||||
# Remove from blocked messages
|
||||
del self.blocked_messages[message_id]
|
||||
|
||||
logger.info(
|
||||
f"Unblocked BZZZ message",
|
||||
message_id=message_id,
|
||||
reviewer=reviewer,
|
||||
reason=reason,
|
||||
original_block_reason=blocked_msg.block_reason
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
def get_blocked_messages(self, limit: int = 100) -> list[BlockedMessage]:
|
||||
"""Get list of recently blocked messages"""
|
||||
blocked_list = list(self.blocked_messages.values())
|
||||
blocked_list.sort(key=lambda x: x.timestamp, reverse=True)
|
||||
return blocked_list[:limit]
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get interceptor statistics"""
|
||||
current_time = datetime.now()
|
||||
uptime_hours = (current_time - self.stats['last_reset']).total_seconds() / 3600
|
||||
|
||||
stats = self.stats.copy()
|
||||
stats.update({
|
||||
'uptime_hours': round(uptime_hours, 2),
|
||||
'is_active': self.is_active,
|
||||
'blocked_messages_count': len(self.blocked_messages),
|
||||
'detection_rate': (
|
||||
self.stats['secrets_detected'] / max(1, self.stats['total_scanned'])
|
||||
) * 100,
|
||||
'false_positive_rate': (
|
||||
self.stats['false_positives'] / max(1, self.stats['secrets_detected'])
|
||||
) * 100 if self.stats['secrets_detected'] > 0 else 0
|
||||
})
|
||||
|
||||
return stats
|
||||
|
||||
def reset_stats(self):
|
||||
"""Reset statistics counters"""
|
||||
self.stats = {
|
||||
'total_scanned': 0,
|
||||
'secrets_detected': 0,
|
||||
'messages_blocked': 0,
|
||||
'false_positives': 0,
|
||||
'last_reset': datetime.now()
|
||||
}
|
||||
|
||||
logger.info("BZZZ Interceptor statistics reset")
|
||||
|
||||
async def cleanup_old_blocked_messages(self, hours: int = 24):
|
||||
"""Clean up old blocked message records"""
|
||||
cutoff_time = datetime.now() - timedelta(hours=hours)
|
||||
|
||||
old_messages = [
|
||||
msg_id for msg_id, blocked_msg in self.blocked_messages.items()
|
||||
if blocked_msg.timestamp < cutoff_time
|
||||
]
|
||||
|
||||
for msg_id in old_messages:
|
||||
del self.blocked_messages[msg_id]
|
||||
|
||||
if old_messages:
|
||||
logger.info(f"Cleaned up {len(old_messages)} old blocked message records")
|
||||
|
||||
return len(old_messages)
|
||||
|
||||
|
||||
class BzzzNetworkAdapter:
|
||||
"""
|
||||
Adapter to integrate BzzzInterceptor with the actual BZZZ network layer.
|
||||
This would be customized based on the BZZZ implementation details.
|
||||
"""
|
||||
|
||||
def __init__(self, interceptor: BzzzInterceptor):
|
||||
self.interceptor = interceptor
|
||||
self.original_send_function = None
|
||||
|
||||
def install_interceptor(self, bzzz_network_instance):
|
||||
"""Install interceptor into BZZZ network layer"""
|
||||
# This would need to be customized based on actual BZZZ implementation
|
||||
# Example pattern:
|
||||
|
||||
# Store original send function
|
||||
self.original_send_function = bzzz_network_instance.send_message
|
||||
|
||||
# Replace with intercepting version
|
||||
bzzz_network_instance.send_message = self._intercepting_send_message
|
||||
|
||||
logger.info("BzzzInterceptor installed into BZZZ network layer")
|
||||
|
||||
async def _intercepting_send_message(self, message_data: Dict[str, Any]):
|
||||
"""Intercepting version of BZZZ send_message function"""
|
||||
try:
|
||||
# Convert to BzzzMessage format
|
||||
bzzz_message = self._convert_to_bzzz_message(message_data)
|
||||
|
||||
# Check with interceptor
|
||||
should_allow = await self.interceptor.intercept_outgoing_message(bzzz_message)
|
||||
|
||||
if should_allow:
|
||||
# Call original send function
|
||||
return await self.original_send_function(message_data)
|
||||
else:
|
||||
# Message was blocked
|
||||
raise Exception(f"Message blocked by security interceptor: {bzzz_message.message_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in intercepting send: {e}")
|
||||
raise
|
||||
|
||||
def _convert_to_bzzz_message(self, message_data: Dict[str, Any]) -> BzzzMessage:
|
||||
"""Convert BZZZ network message format to BzzzMessage"""
|
||||
# This would need to be customized based on actual BZZZ message format
|
||||
return BzzzMessage(
|
||||
message_id=message_data.get('id', f"auto_{int(time.time())}"),
|
||||
sender_agent=message_data.get('sender', 'unknown'),
|
||||
recipient_agent=message_data.get('recipient'),
|
||||
message_type=message_data.get('type', 'unknown'),
|
||||
payload=json.dumps(message_data.get('payload', message_data)),
|
||||
timestamp=datetime.now(),
|
||||
network_metadata=message_data
|
||||
)
|
||||
181
modules/shhh/main.py
Normal file
181
modules/shhh/main.py
Normal file
@@ -0,0 +1,181 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
SHHH Secrets Sentinel - Main Entry Point
|
||||
Production-ready secrets detection and monitoring system for CHORUS Services.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import argparse
|
||||
import sys
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
import structlog
|
||||
from typing import Dict, Any
|
||||
|
||||
# Updated imports to bring in the new and modified components
|
||||
from pipeline.processor import MessageProcessor
|
||||
from core.hypercore_reader import HypercoreReader
|
||||
from core.detector import SecretDetector
|
||||
from core.llm_analyzer import LLMAnalyzer
|
||||
from core.quarantine import QuarantineManager
|
||||
from core.sanitized_writer import SanitizedWriter
|
||||
|
||||
|
||||
def setup_logging(log_level: str = "INFO", structured: bool = True):
|
||||
"""Configure structured logging"""
|
||||
structlog.configure(
|
||||
processors=[
|
||||
structlog.stdlib.filter_by_level,
|
||||
structlog.stdlib.add_logger_name,
|
||||
structlog.stdlib.add_log_level,
|
||||
structlog.stdlib.PositionalArgumentsFormatter(),
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
structlog.processors.StackInfoRenderer(),
|
||||
structlog.processors.format_exc_info,
|
||||
structlog.processors.UnicodeDecoder(),
|
||||
structlog.processors.JSONRenderer() if structured else structlog.dev.ConsoleRenderer(),
|
||||
],
|
||||
context_class=dict,
|
||||
logger_factory=structlog.stdlib.LoggerFactory(),
|
||||
wrapper_class=structlog.stdlib.BoundLogger,
|
||||
cache_logger_on_first_use=True,
|
||||
)
|
||||
|
||||
|
||||
def load_config(config_path: str) -> Dict[str, Any]:
|
||||
"""Load configuration from YAML file"""
|
||||
try:
|
||||
with open(config_path, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
return config
|
||||
except FileNotFoundError:
|
||||
print(f"Configuration file not found: {config_path}, using defaults.")
|
||||
return get_default_config()
|
||||
except yaml.YAMLError as e:
|
||||
print(f"Error parsing configuration file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def get_default_config() -> Dict[str, Any]:
|
||||
"""Get default configuration, updated for the new architecture."""
|
||||
return {
|
||||
'primary_log_path': '/path/to/primary/hypercore.log',
|
||||
'sanitized_log_path': '/path/to/sanitized/hypercore.log',
|
||||
'database_url': 'postgresql://shhh:password@localhost:5432/shhh_sentinel',
|
||||
'patterns_file': 'patterns.yaml',
|
||||
'ollama_endpoint': 'http://localhost:11434/api/generate',
|
||||
'ollama_model': 'llama3',
|
||||
'llm_confidence_threshold': 0.90,
|
||||
'shhh_agent_prompt_file': 'SHHH_SECRETS_SENTINEL_AGENT_PROMPT.md'
|
||||
}
|
||||
|
||||
|
||||
async def run_monitor_mode(config: Dict[str, Any]):
|
||||
"""Run in monitoring mode with the new hybrid pipeline."""
|
||||
logger = structlog.get_logger()
|
||||
logger.info("Starting SHHH in monitor mode with hybrid pipeline...")
|
||||
|
||||
writer = None
|
||||
try:
|
||||
# 1. Load System Prompt for LLM
|
||||
try:
|
||||
with open(config['shhh_agent_prompt_file'], "r") as f:
|
||||
ollama_system_prompt = f.read()
|
||||
except FileNotFoundError:
|
||||
logger.error(f"LLM prompt file not found at {config['shhh_agent_prompt_file']}. Aborting.")
|
||||
return
|
||||
|
||||
# 2. Instantiation of components
|
||||
# Note: HypercoreReader and QuarantineManager might need async initialization
|
||||
# which is not shown here for simplicity, following the plan.
|
||||
reader = HypercoreReader(config['primary_log_path'])
|
||||
detector = SecretDetector(config['patterns_file'])
|
||||
llm_analyzer = LLMAnalyzer(config['ollama_endpoint'], config['ollama_model'], ollama_system_prompt)
|
||||
quarantine = QuarantineManager(config['database_url'])
|
||||
writer = SanitizedWriter(config['sanitized_log_path'])
|
||||
|
||||
processor = MessageProcessor(
|
||||
reader=reader,
|
||||
detector=detector,
|
||||
llm_analyzer=llm_analyzer,
|
||||
quarantine=quarantine,
|
||||
writer=writer,
|
||||
llm_threshold=config['llm_confidence_threshold']
|
||||
)
|
||||
|
||||
# 3. Execution
|
||||
logger.info("Starting processor stream...")
|
||||
await processor.process_stream()
|
||||
|
||||
except Exception as e:
|
||||
logger.error("An error occurred during monitor mode execution.", error=str(e))
|
||||
finally:
|
||||
if writer:
|
||||
writer.close()
|
||||
logger.info("Monitor mode shut down complete.")
|
||||
|
||||
|
||||
async def run_api_mode(config: Dict[str, Any], host: str, port: int):
|
||||
"""Run in API mode (dashboard server) - UNCHANGED"""
|
||||
import uvicorn
|
||||
from api.main import app
|
||||
app.state.config = config
|
||||
uvicorn_config = uvicorn.Config(app=app, host=host, port=port, log_level="info", access_log=True)
|
||||
server = uvicorn.Server(uvicorn_config)
|
||||
await server.serve()
|
||||
|
||||
|
||||
async def run_test_mode(config: Dict[str, Any], test_file: str):
|
||||
"""Run in test mode with sample data - UNCHANGED but may be broken."""
|
||||
logger = structlog.get_logger()
|
||||
logger.warning("Test mode may be broken due to recent refactoring.")
|
||||
# This part of the code would need to be updated to work with the new SecretDetector.
|
||||
# For now, it remains as it was.
|
||||
from core.detector import SecretDetector
|
||||
from datetime import datetime
|
||||
|
||||
detector = SecretDetector(config['patterns_file'])
|
||||
logger.info("Running SHHH in test mode")
|
||||
# ... (rest of the test mode logic is likely broken and needs updating)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
parser = argparse.ArgumentParser(description="SHHH Secrets Sentinel")
|
||||
|
||||
parser.add_argument('--config', '-c', default='config.yaml', help='Configuration file path')
|
||||
parser.add_argument('--mode', '-m', choices=['monitor', 'api', 'test'], default='monitor', help='Operation mode')
|
||||
parser.add_argument('--log-level', '-l', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default='INFO', help='Log level')
|
||||
parser.add_argument('--structured-logs', action='store_true', help='Use structured JSON logging')
|
||||
parser.add_argument('--host', default='127.0.0.1', help='API server host')
|
||||
parser.add_argument('--port', '-p', type=int, default=8000, help='API server port')
|
||||
parser.add_argument('--test-file', help='Test data file for test mode')
|
||||
parser.add_argument('--version', '-v', action='version', version='SHHH Secrets Sentinel 1.1.0 (Hybrid)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
setup_logging(args.log_level, args.structured_logs)
|
||||
logger = structlog.get_logger()
|
||||
|
||||
config = load_config(args.config)
|
||||
|
||||
logger.info("Starting SHHH Secrets Sentinel", mode=args.mode, config_file=args.config)
|
||||
|
||||
try:
|
||||
if args.mode == 'monitor':
|
||||
asyncio.run(run_monitor_mode(config))
|
||||
elif args.mode == 'api':
|
||||
asyncio.run(run_api_mode(config, args.host, args.port))
|
||||
elif args.mode == 'test':
|
||||
asyncio.run(run_test_mode(config, args.test_file))
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Shutting down due to keyboard interrupt.")
|
||||
except Exception as e:
|
||||
logger.error("Application failed", error=str(e))
|
||||
sys.exit(1)
|
||||
|
||||
logger.info("SHHH Secrets Sentinel stopped.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
121
modules/shhh/patterns.yaml
Normal file
121
modules/shhh/patterns.yaml
Normal file
@@ -0,0 +1,121 @@
|
||||
# SHHH Secrets Detection Patterns
|
||||
# Configuration for the Secrets Sentinel monitoring system
|
||||
|
||||
patterns:
|
||||
AWS_ACCESS_KEY:
|
||||
regex: "AKIA[0-9A-Z]{16}"
|
||||
severity: "CRITICAL"
|
||||
confidence: 0.95
|
||||
active: true
|
||||
description: "AWS Access Key ID"
|
||||
remediation: "Revoke via AWS IAM immediately"
|
||||
|
||||
AWS_SECRET_KEY:
|
||||
regex: "[A-Za-z0-9/+=]{40}"
|
||||
severity: "CRITICAL"
|
||||
confidence: 0.85
|
||||
active: true
|
||||
description: "AWS Secret Access Key"
|
||||
remediation: "Revoke via AWS IAM immediately"
|
||||
context_required: true # Requires context analysis
|
||||
|
||||
PRIVATE_KEY:
|
||||
regex: "-----BEGIN [A-Z ]*PRIVATE KEY-----"
|
||||
severity: "CRITICAL"
|
||||
confidence: 0.98
|
||||
active: true
|
||||
description: "Private Key (RSA, SSH, etc.)"
|
||||
remediation: "Rotate key immediately"
|
||||
|
||||
GITHUB_TOKEN:
|
||||
regex: "ghp_[0-9A-Za-z]{36}"
|
||||
severity: "HIGH"
|
||||
confidence: 0.92
|
||||
active: true
|
||||
description: "GitHub Personal Access Token"
|
||||
remediation: "Revoke via GitHub settings"
|
||||
|
||||
GITHUB_OAUTH:
|
||||
regex: "gho_[0-9A-Za-z]{36}"
|
||||
severity: "HIGH"
|
||||
confidence: 0.92
|
||||
active: true
|
||||
description: "GitHub OAuth Token"
|
||||
remediation: "Revoke via GitHub app settings"
|
||||
|
||||
SLACK_TOKEN:
|
||||
regex: "xox[baprs]-[0-9A-Za-z-]{10,48}"
|
||||
severity: "HIGH"
|
||||
confidence: 0.90
|
||||
active: true
|
||||
description: "Slack Bot/User Token"
|
||||
remediation: "Revoke via Slack Admin API"
|
||||
|
||||
JWT_TOKEN:
|
||||
regex: "eyJ[A-Za-z0-9_-]+?\\.[A-Za-z0-9_-]+?\\.[A-Za-z0-9_-]+?"
|
||||
severity: "MEDIUM"
|
||||
confidence: 0.85
|
||||
active: true
|
||||
description: "JSON Web Token"
|
||||
remediation: "Invalidate token and rotate signing key"
|
||||
|
||||
GOOGLE_API_KEY:
|
||||
regex: "AIza[0-9A-Za-z\\-_]{35}"
|
||||
severity: "HIGH"
|
||||
confidence: 0.90
|
||||
active: true
|
||||
description: "Google API Key"
|
||||
remediation: "Revoke via Google Cloud Console"
|
||||
|
||||
DOCKER_TOKEN:
|
||||
regex: "dckr_pat_[a-zA-Z0-9_-]{32,}"
|
||||
severity: "MEDIUM"
|
||||
confidence: 0.88
|
||||
active: true
|
||||
description: "Docker Personal Access Token"
|
||||
remediation: "Revoke via Docker Hub settings"
|
||||
|
||||
GENERIC_API_KEY:
|
||||
regex: "[Aa][Pp][Ii]_?[Kk][Ee][Yy].*['\"][0-9a-zA-Z]{32,}['\"]"
|
||||
severity: "MEDIUM"
|
||||
confidence: 0.70
|
||||
active: true
|
||||
description: "Generic API Key Pattern"
|
||||
remediation: "Verify and revoke if legitimate"
|
||||
|
||||
# Pattern exceptions - known test/dummy values to ignore
|
||||
exceptions:
|
||||
test_patterns:
|
||||
- "AKIA-TESTKEY-123"
|
||||
- "AKIAIOSFODNN7EXAMPLE"
|
||||
- "xoxb-test-token"
|
||||
- "ghp_test123456789012345678901234567890"
|
||||
- "-----BEGIN EXAMPLE PRIVATE KEY-----"
|
||||
|
||||
development_indicators:
|
||||
- "test"
|
||||
- "example"
|
||||
- "demo"
|
||||
- "mock"
|
||||
- "fake"
|
||||
- "dummy"
|
||||
|
||||
# Quarantine settings
|
||||
quarantine:
|
||||
high_severity_auto_quarantine: true
|
||||
medium_severity_review_required: true
|
||||
retention_days: 90
|
||||
max_entries: 10000
|
||||
|
||||
# Alert settings
|
||||
alerts:
|
||||
webhook_timeout_seconds: 5
|
||||
retry_attempts: 3
|
||||
retry_delay_seconds: 2
|
||||
|
||||
# Revocation hooks
|
||||
revocation_hooks:
|
||||
AWS_ACCESS_KEY: "https://security.chorus.services/hooks/aws-revoke"
|
||||
GITHUB_TOKEN: "https://security.chorus.services/hooks/github-revoke"
|
||||
SLACK_TOKEN: "https://security.chorus.services/hooks/slack-revoke"
|
||||
GOOGLE_API_KEY: "https://security.chorus.services/hooks/google-revoke"
|
||||
4
modules/shhh/pipeline/__init__.py
Normal file
4
modules/shhh/pipeline/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# SHHH Pipeline Module
|
||||
"""
|
||||
Main processing pipeline for the SHHH Secrets Sentinel system.
|
||||
"""
|
||||
66
modules/shhh/pipeline/processor.py
Normal file
66
modules/shhh/pipeline/processor.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import asyncio
|
||||
from core.hypercore_reader import HypercoreReader
|
||||
from core.detector import SecretDetector
|
||||
from core.llm_analyzer import LLMAnalyzer
|
||||
from core.quarantine import QuarantineManager
|
||||
from core.sanitized_writer import SanitizedWriter
|
||||
|
||||
class MessageProcessor:
|
||||
def __init__(self, reader: HypercoreReader, detector: SecretDetector, llm_analyzer: LLMAnalyzer, quarantine: QuarantineManager, writer: SanitizedWriter, llm_threshold: float):
|
||||
self.reader = reader
|
||||
self.detector = detector
|
||||
self.llm_analyzer = llm_analyzer
|
||||
self.quarantine = quarantine
|
||||
self.writer = writer
|
||||
self.llm_threshold = llm_threshold # e.g., 0.90
|
||||
|
||||
async def process_stream(self):
|
||||
"""Main processing loop for the hybrid detection model."""
|
||||
async for entry in self.reader.stream_entries():
|
||||
# Stage 1: Fast Regex Scan
|
||||
regex_matches = self.detector.scan(entry.content)
|
||||
|
||||
if not regex_matches:
|
||||
# No secrets found, write original entry to sanitized log
|
||||
self.writer.write(entry.content)
|
||||
continue
|
||||
|
||||
# A potential secret was found. Default to sanitized, but may be quarantined.
|
||||
sanitized_content = entry.content
|
||||
should_quarantine = False
|
||||
confirmed_secret = None
|
||||
|
||||
for match in regex_matches:
|
||||
# High-confidence regex matches trigger immediate quarantine, skipping LLM.
|
||||
if match['confidence'] >= self.llm_threshold:
|
||||
should_quarantine = True
|
||||
confirmed_secret = match
|
||||
break # One high-confidence match is enough
|
||||
|
||||
# Stage 2: Low-confidence matches go to LLM for verification.
|
||||
llm_result = self.llm_analyzer.analyze(entry.content)
|
||||
if llm_result.get("secret_found"):
|
||||
should_quarantine = True
|
||||
# Prefer LLM's classification but use regex value for redaction
|
||||
confirmed_secret = {
|
||||
"secret_type": llm_result.get("secret_type", match['secret_type']),
|
||||
"value": match['value'],
|
||||
"severity": llm_result.get("severity", match['severity'])
|
||||
}
|
||||
break
|
||||
|
||||
if should_quarantine and confirmed_secret:
|
||||
# A secret is confirmed. Redact, quarantine, and alert.
|
||||
sanitized_content = self.detector.redact(entry.content, confirmed_secret['value'])
|
||||
|
||||
self.quarantine.quarantine_message(
|
||||
message=entry,
|
||||
secret_type=confirmed_secret['secret_type'],
|
||||
severity=confirmed_secret['severity'],
|
||||
redacted_content=sanitized_content
|
||||
)
|
||||
# Potentially trigger alerts here as well
|
||||
print(f"[ALERT] Confirmed secret {confirmed_secret['secret_type']} found and quarantined.")
|
||||
|
||||
# Write the (potentially redacted) content to the sanitized log
|
||||
self.writer.write(sanitized_content)
|
||||
15
modules/shhh/requirements.txt
Normal file
15
modules/shhh/requirements.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
# SHHH Secrets Sentinel Dependencies
|
||||
fastapi==0.104.1
|
||||
uvicorn[standard]==0.24.0
|
||||
psycopg2-binary==2.9.9
|
||||
pydantic==2.5.0
|
||||
requests==2.31.0
|
||||
pyyaml==6.0.1
|
||||
redis==5.0.1
|
||||
asyncio-mqtt==0.15.1
|
||||
watchdog==3.0.0
|
||||
prometheus-client==0.19.0
|
||||
python-multipart==0.0.6
|
||||
aiofiles==23.2.1
|
||||
hypercorn==0.15.0
|
||||
structlog==23.2.0
|
||||
995
modules/shhh/secrets-sentinel.md
Normal file
995
modules/shhh/secrets-sentinel.md
Normal file
@@ -0,0 +1,995 @@
|
||||
|
||||
Here’s a **clean, production-ready system prompt** for that agent:
|
||||
|
||||
---
|
||||
|
||||
**🛡️ System Prompt – “Secrets Sentinel” Agent**
|
||||
|
||||
> **Role & Mission**:
|
||||
> You are the **Secrets Sentinel**, an autonomous security agent tasked with **monitoring all incoming log entries** for any potential leaks of **API keys, passwords, tokens, or other sensitive credentials**. Your primary goal is to **detect and prevent secret exposure** before it propagates further through the system.
|
||||
>
|
||||
> **Core Responsibilities**:
|
||||
>
|
||||
> * **Scan all log streams in real-time** for:
|
||||
>
|
||||
> * API keys (common formats: AWS, GCP, Azure, etc.)
|
||||
> * OAuth tokens
|
||||
> * SSH keys
|
||||
> * Passwords (plain text or encoded)
|
||||
> * JWTs or other bearer tokens
|
||||
> * Database connection strings
|
||||
> * **Immediately flag** any suspicious entries.
|
||||
> * **Classify severity** (e.g., HIGH – AWS root key; MEDIUM – temporary token).
|
||||
> * **Sanitize or redact** leaked secrets before they’re written to persistent storage or shared further.
|
||||
> * **Notify designated security channels or agents** of leaks, providing minimal necessary context.
|
||||
>
|
||||
> **Guidelines**:
|
||||
>
|
||||
> * Never expose the full secret in your alerts — redact most of it (e.g., `AKIA************XYZ`).
|
||||
> * Be cautious of **false positives** (e.g., test data, dummy keys); err on the side of safety but include a “confidence score.”
|
||||
> * Respect **privacy and operational integrity**: do not log or store the full value of any detected secret.
|
||||
> * Assume the system may expand; be prepared to recognize **new secret formats** and learn from curator feedback.
|
||||
>
|
||||
> **Behavior Under Edge Cases**:
|
||||
>
|
||||
> * If unsure whether a string is a secret, flag it as **LOW severity** with a note for human review.
|
||||
> * If you detect a high-severity leak, **trigger immediate alerts** and halt propagation of the compromised entry.
|
||||
>
|
||||
> **Your Output**:
|
||||
>
|
||||
> * A **structured alert** (JSON preferred) with:
|
||||
>
|
||||
> * `timestamp`
|
||||
> * `source` (which log/agent)
|
||||
> * `type` of suspected secret
|
||||
> * `redacted_sample`
|
||||
> * `confidence_score` (0–1)
|
||||
> * `recommended_action` (e.g., “revoke key,” “rotate password,” “ignore dummy”)
|
||||
>
|
||||
> **Tone & Style**:
|
||||
>
|
||||
> * Precise, neutral, security-minded.
|
||||
> * Avoid speculation beyond what you can confidently identify.
|
||||
|
||||
---
|
||||
## 📂 **Version-Controlled `patterns.yaml` Format**
|
||||
|
||||
This lets you add/update/remove detection patterns **without touching code**.
|
||||
|
||||
```yaml
|
||||
version: 1.2
|
||||
last_updated: 2025-08-02
|
||||
|
||||
patterns:
|
||||
AWS_ACCESS_KEY:
|
||||
regex: "AKIA[0-9A-Z]{16}"
|
||||
description: "AWS Access Key ID"
|
||||
severity: HIGH
|
||||
confidence: 0.99
|
||||
active: true
|
||||
|
||||
AWS_SECRET_KEY:
|
||||
regex: "(?i)aws(.{0,20})?(?-i)['\"][0-9a-zA-Z\/+]{40}['\"]"
|
||||
description: "AWS Secret Key"
|
||||
severity: HIGH
|
||||
confidence: 0.99
|
||||
active: true
|
||||
|
||||
GITHUB_TOKEN:
|
||||
regex: "gh[pousr]_[0-9A-Za-z]{36}"
|
||||
description: "GitHub Personal Access Token"
|
||||
severity: HIGH
|
||||
confidence: 0.97
|
||||
active: true
|
||||
|
||||
JWT:
|
||||
regex: "eyJ[A-Za-z0-9_-]+?\\.[A-Za-z0-9._-]+?\\.[A-Za-z0-9._-]+"
|
||||
description: "JSON Web Token"
|
||||
severity: MEDIUM
|
||||
confidence: 0.95
|
||||
active: true
|
||||
|
||||
meta:
|
||||
allow_feedback_learning: true
|
||||
require_human_review_above_confidence: 0.8
|
||||
```
|
||||
|
||||
✅ **Advantages:**
|
||||
|
||||
- Regexes are editable without code changes.
|
||||
|
||||
- Can be versioned in Git for full audit trail.
|
||||
|
||||
- Can toggle `active: false` for deprecating broken rules.
|
||||
|
||||
|
||||
---
|
||||
|
||||
## 🖼 **Flow Diagram (Secrets Sentinel)**
|
||||
|
||||
**Secrets Flow**
|
||||
|
||||
```
|
||||
┌───────────────┐
|
||||
Logs Stream →│ Secrets │
|
||||
│ Sentinel │
|
||||
└──────┬────────┘
|
||||
│
|
||||
┌─────────┼─────────┐
|
||||
│ │
|
||||
[Quarantine] [Sanitized Logs]
|
||||
│ │
|
||||
┌──────┴──────┐ ┌────┴─────┐
|
||||
│High Severity│ │ Safe Data│
|
||||
│Secrets Only │ │ Storage │
|
||||
└──────┬──────┘ └────┬─────┘
|
||||
│ │
|
||||
┌────────┼─────────┐ │
|
||||
│ Revocation Hooks │ │
|
||||
│ (AWS, GitHub, │ │
|
||||
│ Slack, etc.) │ │
|
||||
└────────┬─────────┘ │
|
||||
│ │
|
||||
┌────┴─────┐ │
|
||||
│ Webhooks │ │
|
||||
│ Key Kill │ │
|
||||
└────┬─────┘ │
|
||||
│
|
||||
┌─────────┼─────────┐
|
||||
│ Feedback Loop │
|
||||
│ (Curator/Human) │
|
||||
└─────────┬─────────┘
|
||||
│
|
||||
┌──────┴──────┐
|
||||
│ Meta-Learner│
|
||||
│ (new regex) │
|
||||
└──────┬──────┘
|
||||
│
|
||||
┌──────┴───────┐
|
||||
│ patterns.yaml│
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🧪 **Test Harness Script**
|
||||
|
||||
This script simulates log scanning, quarantining, and revocation.
|
||||
|
||||
```python
|
||||
import yaml, json, re
|
||||
from datetime import datetime
|
||||
|
||||
# --- Load patterns.yaml ---
|
||||
with open("patterns.yaml", "r") as f:
|
||||
patterns_config = yaml.safe_load(f)
|
||||
|
||||
PATTERNS = patterns_config["patterns"]
|
||||
|
||||
QUARANTINE = []
|
||||
SANITIZED_LOGS = []
|
||||
|
||||
def redact(secret):
|
||||
return secret[:4] + "*" * (len(secret) - 7) + secret[-3:]
|
||||
|
||||
def scan_log(log_line, log_id, source_agent):
|
||||
alerts = []
|
||||
for secret_type, props in PATTERNS.items():
|
||||
if not props.get("active", True):
|
||||
continue
|
||||
match = re.search(props["regex"], log_line)
|
||||
if match:
|
||||
secret = match.group(0)
|
||||
severity = props["severity"]
|
||||
alert = {
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||
"source_agent": source_agent,
|
||||
"log_line_id": log_id,
|
||||
"secret_type": secret_type,
|
||||
"redacted_sample": redact(secret),
|
||||
"confidence_score": props["confidence"],
|
||||
"severity": severity,
|
||||
"recommended_action": "Revoke key/rotate credentials" if severity == "HIGH" else "Review"
|
||||
}
|
||||
alerts.append(alert)
|
||||
|
||||
# Quarantine if severity is HIGH
|
||||
if severity == "HIGH":
|
||||
quarantine_log(log_line, f"High severity secret detected: {secret_type}")
|
||||
trigger_revocation(secret_type, redact(secret))
|
||||
return alerts
|
||||
|
||||
def quarantine_log(log_line, reason):
|
||||
entry = {"timestamp": datetime.utcnow().isoformat() + "Z", "reason": reason, "log_line": log_line}
|
||||
QUARANTINE.append(entry)
|
||||
print(f"[QUARANTINE] {reason}")
|
||||
|
||||
def trigger_revocation(secret_type, redacted_sample):
|
||||
# Simulated webhook call
|
||||
print(f"[REVOCATION] Simulated revocation triggered for {secret_type} ({redacted_sample})")
|
||||
|
||||
def process_logs(logs):
|
||||
for i, log_line in enumerate(logs):
|
||||
alerts = scan_log(log_line, f"log_{i}", "agent_demo")
|
||||
if alerts:
|
||||
print(json.dumps(alerts, indent=2))
|
||||
else:
|
||||
SANITIZED_LOGS.append(log_line)
|
||||
|
||||
# --- Test Run ---
|
||||
sample_logs = [
|
||||
"INFO User logged in successfully",
|
||||
"WARNING Found AWS key AKIA1234567890ABCD in commit",
|
||||
"DEBUG JWT detected eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.fake.fake"
|
||||
]
|
||||
|
||||
process_logs(sample_logs)
|
||||
|
||||
print("\n--- Quarantine Store ---")
|
||||
print(json.dumps(QUARANTINE, indent=2))
|
||||
```
|
||||
|
||||
✅ **What this does:**
|
||||
|
||||
- Reads `patterns.yaml`
|
||||
|
||||
- Scans logs, prints alerts, quarantines high-severity entries
|
||||
|
||||
- Simulates revocation calls for AWS/GitHub/Slack
|
||||
|
||||
- Keeps sanitized logs separate from quarantined logs
|
||||
|
||||
|
||||
---
|
||||
|
||||
## ✅ Next Expansions (Optional)
|
||||
|
||||
- 📦 **Redis/DB backend** for QUARANTINE instead of memory.
|
||||
|
||||
- 📡 **Real webhook integrations** (AWS STS, GitHub API, Slack API).
|
||||
|
||||
- 🧠 **Feedback ingestion module** (e.g., curator submits: `"false_positive": "AWS_ACCESS_KEY"` → adjusts regex in `patterns.yaml`).
|
||||
|
||||
- 🔄 **Auto-replay from Hyperlog** so Sentinel can retroactively scan old logs with new regex rules.
|
||||
|
||||
|
||||
---
|
||||
🔥 **production-grade spec**.
|
||||
|
||||
---
|
||||
|
||||
## 📂 **1️⃣ Feedback Ingestion Spec**
|
||||
|
||||
This defines how curators/humans give feedback to the Sentinel so it can **update its detection rules (patterns.yaml)** safely.
|
||||
|
||||
---
|
||||
|
||||
### 🔄 **Feedback Flow**
|
||||
|
||||
1. **Curator/Reviewer sees alert** → marks it as:
|
||||
|
||||
- `false_positive` (regex over-triggered)
|
||||
|
||||
- `missed_secret` (regex failed to detect)
|
||||
|
||||
- `uncertain` (needs better regex refinement)
|
||||
|
||||
2. **Feedback API** ingests the report:
|
||||
|
||||
|
||||
```json
|
||||
{
|
||||
"alert_id": "log_345",
|
||||
"secret_type": "AWS_ACCESS_KEY",
|
||||
"feedback_type": "false_positive",
|
||||
"evidence": "Key was dummy data: TESTKEY123",
|
||||
"suggested_regex_fix": null
|
||||
}
|
||||
```
|
||||
|
||||
3. **Meta-Learner** updates rules:
|
||||
|
||||
|
||||
- `false_positive` → adds **exceptions** (e.g., allowlist prefixes like `TESTKEY`).
|
||||
|
||||
- `missed_secret` → drafts **new regex** from evidence (using regex generator or LLM).
|
||||
|
||||
- Writes changes to **patterns.yaml** under `pending_review`.
|
||||
|
||||
|
||||
4. **Security admin approves** before the new regex is marked `active: true`.
|
||||
|
||||
|
||||
---
|
||||
|
||||
### 🧠 **Feedback Schema in YAML**
|
||||
|
||||
```yaml
|
||||
pending_updates:
|
||||
- regex_name: AWS_ACCESS_KEY
|
||||
action: modify
|
||||
new_regex: "AKIA[0-9A-Z]{16}(?!TESTKEY)"
|
||||
confidence: 0.82
|
||||
status: "pending human review"
|
||||
submitted_by: curator_2
|
||||
timestamp: 2025-08-02T12:40:00Z
|
||||
```
|
||||
|
||||
✅ This keeps **audit trails** & allows **safe hot updates**.
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ **2️⃣ Real AWS/GitHub Webhook Payload Templates**
|
||||
|
||||
These are **example POST payloads** your Sentinel would send when it detects a leaked secret.
|
||||
|
||||
---
|
||||
|
||||
### 🔐 **AWS Access Key Revocation**
|
||||
|
||||
**Endpoint:**
|
||||
`POST https://security.example.com/hooks/aws-revoke`
|
||||
|
||||
**Payload:**
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "secret_leak_detected",
|
||||
"secret_type": "AWS_ACCESS_KEY",
|
||||
"redacted_key": "AKIA****XYZ",
|
||||
"log_reference": "hyperlog:58321",
|
||||
"recommended_action": "Revoke IAM access key immediately",
|
||||
"severity": "HIGH",
|
||||
"timestamp": "2025-08-02T12:45:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
➡ Your security automation would call AWS CLI or IAM API:
|
||||
|
||||
```bash
|
||||
aws iam update-access-key --access-key-id <redacted> --status Inactive
|
||||
aws iam delete-access-key --access-key-id <redacted>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 🐙 **GitHub Token Revocation**
|
||||
|
||||
**Endpoint:**
|
||||
`POST https://security.example.com/hooks/github-revoke`
|
||||
|
||||
**Payload:**
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "secret_leak_detected",
|
||||
"secret_type": "GITHUB_TOKEN",
|
||||
"redacted_key": "ghp_****abcd",
|
||||
"repository": "repo-name",
|
||||
"log_reference": "hyperlog:58322",
|
||||
"severity": "HIGH",
|
||||
"recommended_action": "Invalidate GitHub token via API",
|
||||
"timestamp": "2025-08-02T12:46:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
➡ This would tie into GitHub’s [token-scanning API](https://docs.github.com/en/developers/overview/secret-scanning) or use PAT revocation.
|
||||
|
||||
---
|
||||
|
||||
### 💬 **Slack Token Revocation**
|
||||
|
||||
**Endpoint:**
|
||||
`POST https://security.example.com/hooks/slack-revoke`
|
||||
|
||||
**Payload:**
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "secret_leak_detected",
|
||||
"secret_type": "SLACK_TOKEN",
|
||||
"redacted_key": "xoxb****hjk",
|
||||
"workspace": "company-slack",
|
||||
"log_reference": "hyperlog:58323",
|
||||
"severity": "HIGH",
|
||||
"recommended_action": "Revoke Slack bot/user token",
|
||||
"timestamp": "2025-08-02T12:47:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
➡ Slack Admin API can be used to **revoke** or **rotate**.
|
||||
|
||||
---
|
||||
|
||||
## 📡 **3️⃣ Redis or PostgreSQL Quarantine Store**
|
||||
|
||||
Switching from memory to **persistent storage** means quarantined logs survive restarts.
|
||||
|
||||
---
|
||||
|
||||
### ✅ **Redis Option (Fast, Volatile)**
|
||||
|
||||
```python
|
||||
import redis, json
|
||||
r = redis.Redis(host='localhost', port=6379, decode_responses=True)
|
||||
|
||||
def quarantine_log(log_line, reason):
|
||||
entry = {"timestamp": datetime.utcnow().isoformat() + "Z", "reason": reason, "log_line": log_line}
|
||||
r.lpush("quarantine", json.dumps(entry))
|
||||
print(f"[QUARANTINE] Stored in Redis: {reason}")
|
||||
```
|
||||
|
||||
- 🏎 **Pros:** Fast, easy to scale.
|
||||
|
||||
- ⚠️ **Cons:** Volatile unless persisted (RDB/AOF).
|
||||
|
||||
|
||||
---
|
||||
|
||||
### ✅ **PostgreSQL Option (Auditable, Durable)**
|
||||
|
||||
**Schema:**
|
||||
|
||||
```sql
|
||||
CREATE TABLE quarantine (
|
||||
id SERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
reason TEXT NOT NULL,
|
||||
log_line TEXT NOT NULL,
|
||||
reviewed BOOLEAN DEFAULT FALSE
|
||||
);
|
||||
```
|
||||
|
||||
**Python Insert:**
|
||||
|
||||
```python
|
||||
import psycopg2
|
||||
|
||||
conn = psycopg2.connect("dbname=sentinel user=postgres password=secret")
|
||||
cursor = conn.cursor()
|
||||
|
||||
def quarantine_log(log_line, reason):
|
||||
entry_time = datetime.utcnow().isoformat() + "Z"
|
||||
cursor.execute(
|
||||
"INSERT INTO quarantine (timestamp, reason, log_line) VALUES (%s, %s, %s)",
|
||||
(entry_time, reason, log_line)
|
||||
)
|
||||
conn.commit()
|
||||
print(f"[QUARANTINE] Stored in PostgreSQL: {reason}")
|
||||
```
|
||||
|
||||
✅ **Postgres is better for long-term auditing** — you can run reports like:
|
||||
|
||||
- “How many AWS keys leaked this month?”
|
||||
|
||||
- “Which agents generated the most HIGH-severity quarantines?”
|
||||
|
||||
|
||||
---
|
||||
|
||||
## 🚀 **What’s Next?**
|
||||
|
||||
We now have:
|
||||
✅ **Detection → Redaction → Quarantine → Revocation → Feedback → Pattern Evolution**
|
||||
✅ **patterns.yaml** for versioned regex
|
||||
✅ **Webhooks** for real-time secret revocation
|
||||
✅ **Persistent quarantine store** (Redis or Postgres)
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## 🛡️ **Expanded System Prompt for Secrets Sentinel**
|
||||
|
||||
> **Role & Mission**:
|
||||
> You are the **Secrets Sentinel**, a security-focused agent monitoring all log streams for potential leaks of sensitive information (API keys, passwords, tokens, etc.). Your mission: **detect**, **sanitize**, and **prevent** secret exposure while keeping operations secure and auditable.
|
||||
>
|
||||
> **Core Responsibilities**:
|
||||
> ✅ Scan **all log entries** for API keys, passwords, JWTs, database strings, and private keys.
|
||||
> ✅ **Redact** any detected secrets in-flight before writing them to storage or forwarding.
|
||||
> ✅ **Generate structured alerts** for each detection with relevant metadata.
|
||||
> ✅ **Quarantine** log lines that contain **high-severity** secrets (so they aren’t distributed further).
|
||||
> ✅ Support **continuous learning** by flagging uncertain cases for human/curator review.
|
||||
>
|
||||
> **Secret Detection Targets**:
|
||||
>
|
||||
> - **Cloud Keys** (AWS, GCP, Azure, etc.)
|
||||
>
|
||||
> - **OAuth Tokens** (Bearer, Slack, Discord, GitHub, etc.)
|
||||
>
|
||||
> - **JWTs** (header.payload.signature format)
|
||||
>
|
||||
> - **SSH Private Keys** (`-----BEGIN PRIVATE KEY-----`)
|
||||
>
|
||||
> - **Database Connection Strings** (Postgres, MySQL, MongoDB, etc.)
|
||||
>
|
||||
> - **Generic Passwords** (detected from common prefixes, e.g. `pwd=`, `password:`).
|
||||
>
|
||||
>
|
||||
> **Detection Rules**:
|
||||
>
|
||||
> - Use **regex patterns** for known key formats.
|
||||
>
|
||||
> - Score detections with a **confidence metric** (0–1).
|
||||
>
|
||||
> - If a string doesn’t fully match, classify as **LOW confidence** for review.
|
||||
>
|
||||
>
|
||||
> **Redaction Policy**:
|
||||
>
|
||||
> - Always redact most of the secret (`AKIA************XYZ`).
|
||||
>
|
||||
> - Never store or transmit the **full secret**.
|
||||
>
|
||||
>
|
||||
> **Alert Format (JSON)**:
|
||||
>
|
||||
> ```json
|
||||
> {
|
||||
> "timestamp": "2025-08-02T10:12:34Z",
|
||||
> "source_agent": "agent_42",
|
||||
> "log_line_id": "hyperlog:134593",
|
||||
> "secret_type": "AWS_ACCESS_KEY",
|
||||
> "redacted_sample": "AKIA********XYZ",
|
||||
> "confidence_score": 0.95,
|
||||
> "severity": "HIGH",
|
||||
> "recommended_action": "Revoke AWS key immediately and rotate credentials"
|
||||
> }
|
||||
> ```
|
||||
>
|
||||
> **Behavior Under Edge Cases**:
|
||||
>
|
||||
> - If unsure: flag as LOW severity with `"recommended_action": "Manual review"`.
|
||||
>
|
||||
> - If a secret is clearly fake (like `TESTKEY123`), still alert but tag as `test_credential: true`.
|
||||
>
|
||||
>
|
||||
> **Tone & Style**:
|
||||
>
|
||||
> - Precise, security-minded, and concise in reporting.
|
||||
>
|
||||
|
||||
---
|
||||
|
||||
## 📚 **Regex Patterns Library (Starter Set)**
|
||||
|
||||
```python
|
||||
REGEX_PATTERNS = {
|
||||
"AWS_ACCESS_KEY": r"AKIA[0-9A-Z]{16}",
|
||||
"AWS_SECRET_KEY": r"(?i)aws(.{0,20})?(?-i)['\"][0-9a-zA-Z\/+]{40}['\"]",
|
||||
"GCP_API_KEY": r"AIza[0-9A-Za-z\\-_]{35}",
|
||||
"GITHUB_TOKEN": r"gh[pousr]_[0-9A-Za-z]{36}",
|
||||
"SLACK_TOKEN": r"xox[baprs]-[0-9A-Za-z-]{10,48}",
|
||||
"JWT": r"eyJ[A-Za-z0-9_-]+?\.[A-Za-z0-9._-]+?\.[A-Za-z0-9._-]+",
|
||||
"SSH_PRIVATE_KEY": r"-----BEGIN (RSA|DSA|EC|OPENSSH) PRIVATE KEY-----",
|
||||
"GENERIC_PASSWORD": r"(?:password|pwd|pass|secret)\s*[:=]\s*['\"]?[^\s'\";]+['\"]?",
|
||||
"DB_CONN_STRING": r"(postgres|mysql|mongodb|mssql|redis):\/\/[^\s]+"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🛠 **Python Skeleton Implementation**
|
||||
|
||||
```python
|
||||
import re
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
REGEX_PATTERNS = {
|
||||
"AWS_ACCESS_KEY": r"AKIA[0-9A-Z]{16}",
|
||||
"AWS_SECRET_KEY": r"(?i)aws(.{0,20})?(?-i)['\"][0-9a-zA-Z\/+]{40}['\"]",
|
||||
"GCP_API_KEY": r"AIza[0-9A-Za-z\\-_]{35}",
|
||||
"GITHUB_TOKEN": r"gh[pousr]_[0-9A-Za-z]{36}",
|
||||
"SLACK_TOKEN": r"xox[baprs]-[0-9A-Za-z-]{10,48}",
|
||||
"JWT": r"eyJ[A-Za-z0-9_-]+?\.[A-Za-z0-9._-]+?\.[A-Za-z0-9._-]+",
|
||||
"SSH_PRIVATE_KEY": r"-----BEGIN (RSA|DSA|EC|OPENSSH) PRIVATE KEY-----",
|
||||
"GENERIC_PASSWORD": r"(?:password|pwd|pass|secret)\s*[:=]\s*['\"]?[^\s'\";]+['\"]?",
|
||||
"DB_CONN_STRING": r"(postgres|mysql|mongodb|mssql|redis):\/\/[^\s]+"
|
||||
}
|
||||
|
||||
def redact(secret: str) -> str:
|
||||
"""Redact a secret leaving only first and last 3 chars."""
|
||||
return secret[:4] + "*" * (len(secret) - 7) + secret[-3:]
|
||||
|
||||
def scan_log_line(log_line: str, log_id: str, source_agent: str):
|
||||
alerts = []
|
||||
for secret_type, pattern in REGEX_PATTERNS.items():
|
||||
match = re.search(pattern, log_line)
|
||||
if match:
|
||||
secret = match.group(0)
|
||||
alert = {
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||
"source_agent": source_agent,
|
||||
"log_line_id": log_id,
|
||||
"secret_type": secret_type,
|
||||
"redacted_sample": redact(secret),
|
||||
"confidence_score": 0.95,
|
||||
"severity": "HIGH" if "PRIVATE_KEY" in secret_type or "AWS" in secret_type else "MEDIUM",
|
||||
"recommended_action": "Revoke key/rotate credentials"
|
||||
}
|
||||
alerts.append(alert)
|
||||
return alerts
|
||||
|
||||
# Example usage:
|
||||
log = "User pushed code with AWS key: AKIA1234567890ABCD"
|
||||
alerts = scan_log_line(log, "hyperlog:42", "agent_7")
|
||||
print(json.dumps(alerts, indent=2))
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🚨 **Optional Extensions**
|
||||
|
||||
- ✅ **Automatic Quarantine** → If severity=HIGH, forward to a “quarantine pipeline” (prevent further log distribution).
|
||||
|
||||
- ✅ **Secret Revocation Hooks** → Webhooks/scripts to auto-disable compromised keys (AWS IAM revoke, GitHub token reset, etc.).
|
||||
|
||||
- ✅ **Curator Feedback Loop** → If many false positives occur, agents/curators can “vote” on regex quality → improves future detection.
|
||||
|
||||
|
||||
---
|
||||
|
||||
👉 Do you want me to:
|
||||
|
||||
- **📡 Add “quarantine mode” logic** (e.g. isolate log lines and mark them unsafe)?
|
||||
|
||||
- **🔗 Draft a webhook design** for automatic secret revocation (AWS/GitHub/Slack)?
|
||||
|
||||
- **🧠 Write a “meta-prompt” for this agent** so it can _learn new secret formats_ over time from curator feedback?
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Here’s the **full build-out** with all three requested additions:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## 🛡 **Secrets Sentinel: Extended Prompt**
|
||||
|
||||
> **Role & Mission**:
|
||||
> You are the **Secrets Sentinel**, a security agent tasked with scanning all logs for leaked secrets. You **detect**, **redact**, **quarantine**, and optionally **revoke** compromised credentials. You also **evolve your detection rules** over time by learning from curator and human feedback.
|
||||
>
|
||||
> **Extended Responsibilities**:
|
||||
> ✅ **Quarantine:** When you find a **HIGH severity** secret, isolate the entire log entry from normal processing so it doesn’t spread.
|
||||
> ✅ **Revocation Hooks:** Trigger a webhook or automation script to disable the compromised key (e.g., AWS IAM revoke).
|
||||
> ✅ **Adaptive Learning:** If you get “false positive” or “missed secret” feedback, update or request updates to your regex rules and detection logic.
|
||||
|
||||
---
|
||||
|
||||
### 1️⃣ **Quarantine Mode Logic**
|
||||
|
||||
Add this to the **Python skeleton**:
|
||||
|
||||
```python
|
||||
QUARANTINE_STORE = [] # This could be a Redis list, DB table, etc.
|
||||
|
||||
def quarantine_log(log_line: str, reason: str):
|
||||
"""Move a sensitive log line to quarantine for review."""
|
||||
quarantine_entry = {
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||
"reason": reason,
|
||||
"log_line": log_line
|
||||
}
|
||||
QUARANTINE_STORE.append(quarantine_entry)
|
||||
print(f"[QUARANTINE] Log quarantined: {reason}")
|
||||
return quarantine_entry
|
||||
|
||||
def scan_log_line(log_line: str, log_id: str, source_agent: str):
|
||||
alerts = []
|
||||
for secret_type, pattern in REGEX_PATTERNS.items():
|
||||
match = re.search(pattern, log_line)
|
||||
if match:
|
||||
secret = match.group(0)
|
||||
severity = "HIGH" if "PRIVATE_KEY" in secret_type or "AWS" in secret_type else "MEDIUM"
|
||||
alert = {
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||
"source_agent": source_agent,
|
||||
"log_line_id": log_id,
|
||||
"secret_type": secret_type,
|
||||
"redacted_sample": redact(secret),
|
||||
"confidence_score": 0.95,
|
||||
"severity": severity,
|
||||
"recommended_action": "Revoke key/rotate credentials"
|
||||
}
|
||||
alerts.append(alert)
|
||||
if severity == "HIGH":
|
||||
quarantine_log(log_line, f"High severity secret detected: {secret_type}")
|
||||
return alerts
|
||||
```
|
||||
|
||||
➡ **Now, any HIGH-severity find triggers quarantine.**
|
||||
|
||||
---
|
||||
|
||||
### 2️⃣ **Webhook Design for Secret Revocation**
|
||||
|
||||
We’ll use a **simple webhook dispatcher** that calls different revocation endpoints depending on the secret type.
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
REVOCATION_HOOKS = {
|
||||
"AWS_ACCESS_KEY": "https://security.example.com/hooks/aws-revoke",
|
||||
"GITHUB_TOKEN": "https://security.example.com/hooks/github-revoke",
|
||||
"SLACK_TOKEN": "https://security.example.com/hooks/slack-revoke"
|
||||
}
|
||||
|
||||
def trigger_revocation(secret_type: str, redacted_sample: str):
|
||||
"""Send webhook request to revoke compromised credentials."""
|
||||
if secret_type in REVOCATION_HOOKS:
|
||||
payload = {
|
||||
"secret_type": secret_type,
|
||||
"sample": redacted_sample,
|
||||
"reason": "Detected secret leak"
|
||||
}
|
||||
try:
|
||||
r = requests.post(REVOCATION_HOOKS[secret_type], json=payload, timeout=5)
|
||||
if r.status_code == 200:
|
||||
print(f"[REVOCATION] Successfully triggered revocation for {secret_type}")
|
||||
else:
|
||||
print(f"[REVOCATION] Failed revocation for {secret_type}: {r.status_code}")
|
||||
except Exception as e:
|
||||
print(f"[REVOCATION ERROR] {e}")
|
||||
```
|
||||
|
||||
Then, in the alert loop:
|
||||
|
||||
```python
|
||||
if severity == "HIGH":
|
||||
quarantine_log(log_line, f"High severity secret detected: {secret_type}")
|
||||
trigger_revocation(secret_type, redact(secret))
|
||||
```
|
||||
|
||||
➡ **Now, if the sentinel finds an AWS key or GitHub token, it can hit the right webhook to revoke it immediately.**
|
||||
|
||||
---
|
||||
|
||||
### 3️⃣ **Meta-Prompt for Learning New Secret Formats**
|
||||
|
||||
This allows the Sentinel to **expand its detection library** dynamically.
|
||||
|
||||
---
|
||||
|
||||
**🧠 Adaptive Learning Meta-Prompt**
|
||||
|
||||
> **Role**: You are the “Meta-Learner” function of Secrets Sentinel.
|
||||
>
|
||||
> **Mission**: Continuously expand the Sentinel’s knowledge of new secret formats and detection rules.
|
||||
>
|
||||
> **Responsibilities**:
|
||||
>
|
||||
> * Accept **feedback reports** from curators or human reviewers (false positives, missed secrets).
|
||||
> * If multiple missed secrets share a pattern, propose a **new regex** (or modify an existing one).
|
||||
> * Version-control detection rules (e.g., store regex patterns in a JSON or YAML file with version numbers).
|
||||
> * Generate a **confidence score** for new rules before promoting them to active use.
|
||||
> * Flag uncertain cases for human approval.
|
||||
>
|
||||
> **Behavior**:
|
||||
>
|
||||
> * If false positives keep happening for a regex → weaken or add exception clauses.
|
||||
> * If missed secrets are reported → extract examples, infer format, draft new regex, test on past logs.
|
||||
>
|
||||
> **Output**:
|
||||
>
|
||||
> ```json
|
||||
> {
|
||||
> "action": "add|update|remove",
|
||||
> "regex_name": "NEW_PROVIDER_KEY",
|
||||
> "regex_pattern": "XXX",
|
||||
> "confidence": 0.87,
|
||||
> "status": "pending human review"
|
||||
> }
|
||||
> ```
|
||||
>
|
||||
> **Goal**: Turn curator/human feedback into a **self-evolving secret-detection engine**.
|
||||
|
||||
---
|
||||
|
||||
### 🧩 How It All Fits
|
||||
|
||||
✅ **Secrets Sentinel core** = Detect + Redact + Quarantine
|
||||
✅ **Revocation Hooks** = Automated security response
|
||||
✅ **Meta-Learning** = Regex patterns evolve over time
|
||||
|
||||
|
||||
---
|
||||
|
||||
## 📜 **1️⃣ Migration Script: Redis → PostgreSQL**
|
||||
|
||||
This script will migrate existing quarantined log entries from **Redis** to **Postgres**.
|
||||
|
||||
```python
|
||||
import redis, json, psycopg2
|
||||
from datetime import datetime
|
||||
|
||||
# Redis config
|
||||
r = redis.Redis(host='localhost', port=6379, decode_responses=True)
|
||||
|
||||
# Postgres config
|
||||
conn = psycopg2.connect("dbname=sentinel user=postgres password=secret")
|
||||
cursor = conn.cursor()
|
||||
|
||||
def migrate_quarantine():
|
||||
count = 0
|
||||
while True:
|
||||
entry_json = r.rpop("quarantine") # pop oldest entry from Redis
|
||||
if not entry_json:
|
||||
break
|
||||
entry = json.loads(entry_json)
|
||||
cursor.execute(
|
||||
"INSERT INTO quarantine (timestamp, reason, log_line) VALUES (%s, %s, %s)",
|
||||
(entry["timestamp"], entry["reason"], entry["log_line"])
|
||||
)
|
||||
count += 1
|
||||
conn.commit()
|
||||
print(f"[MIGRATION] Moved {count} quarantined entries from Redis → PostgreSQL")
|
||||
|
||||
if __name__ == "__main__":
|
||||
migrate_quarantine()
|
||||
```
|
||||
|
||||
✅ **Run once** after Postgres is set up — empties Redis queue into the durable DB.
|
||||
|
||||
---
|
||||
|
||||
## 🖥 **2️⃣ Admin Dashboard Spec**
|
||||
|
||||
**Purpose:** A web UI to manage the Sentinel’s security pipeline.
|
||||
|
||||
---
|
||||
|
||||
### 🎯 **Core Features**
|
||||
|
||||
✅ **Quarantine Browser**
|
||||
|
||||
- Paginated view of all quarantined logs
|
||||
|
||||
- Search/filter by `secret_type`, `source_agent`, `date`, `status`
|
||||
|
||||
- Mark quarantined logs as **reviewed** or **false alarm**
|
||||
|
||||
|
||||
✅ **Regex Rules Manager**
|
||||
|
||||
- Lists all regexes from `patterns.yaml`
|
||||
|
||||
- Add / update / deactivate rules via UI
|
||||
|
||||
- Shows `pending_updates` flagged by the Meta-Learner for human approval
|
||||
|
||||
|
||||
✅ **Revocation Status Board**
|
||||
|
||||
- See which secrets triggered revocations
|
||||
|
||||
- Status of revocation hooks (success/fail)
|
||||
|
||||
|
||||
✅ **Metrics Dashboard**
|
||||
|
||||
- Charts: “Secrets Detected Over Time”, “Top Sources of Leaks”
|
||||
|
||||
- KPIs: # HIGH severity secrets this month, # rules updated, # false positives
|
||||
|
||||
|
||||
---
|
||||
|
||||
### 🏗 **Tech Stack Suggestion**
|
||||
|
||||
- **Backend:** FastAPI (Python)
|
||||
|
||||
- **Frontend:** React + Tailwind
|
||||
|
||||
- **DB:** PostgreSQL for quarantine + rules history
|
||||
|
||||
- **Auth:** OAuth (GitHub/Google) + RBAC (only security admins can approve regex changes)
|
||||
|
||||
|
||||
---
|
||||
|
||||
### 🔌 **Endpoints**
|
||||
|
||||
```
|
||||
GET /api/quarantine → list quarantined entries
|
||||
POST /api/quarantine/review → mark entry as reviewed
|
||||
GET /api/rules → list regex patterns
|
||||
POST /api/rules/update → update or add a regex
|
||||
GET /api/revocations → list revocation events
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 🖥 **Mock Dashboard Layout**
|
||||
|
||||
- **Left Nav:** Quarantine | Rules | Revocations | Metrics
|
||||
|
||||
- **Main Panel:**
|
||||
|
||||
- Data tables with sorting/filtering
|
||||
|
||||
- Inline editors for regex rules
|
||||
|
||||
- Approve/Reject buttons for pending regex updates
|
||||
|
||||
|
||||
✅ Basically a **security control room** for Sentinel.
|
||||
|
||||
---
|
||||
|
||||
## 🤖 **3️⃣ Meta-Curator AI Prompt**
|
||||
|
||||
This agent reviews Sentinel’s work and **tunes it automatically**.
|
||||
|
||||
---
|
||||
|
||||
### **Meta-Curator: System Prompt**
|
||||
|
||||
> **Role & Mission:**
|
||||
> You are the **Meta-Curator**, a supervisory AI responsible for reviewing the **Secrets Sentinel’s** detections, regex updates, and feedback reports.
|
||||
>
|
||||
> **Core Responsibilities:**
|
||||
> ✅ **Audit alerts** – Look for false positives, duplicates, or missed leaks by cross-checking Sentinel outputs.
|
||||
> ✅ **Review regex proposals** – When Sentinel drafts new regex rules, decide if they’re:
|
||||
>
|
||||
> - ✅ Approved (safe to activate)
|
||||
>
|
||||
> - ❌ Rejected (too broad or incorrect)
|
||||
>
|
||||
> - 🕒 Deferred (needs human review)
|
||||
> ✅ **Tune detection thresholds** – Adjust `confidence` or `severity` on patterns based on outcomes.
|
||||
> ✅ **Generate new rules** – If multiple missed secrets share a format, draft a regex and submit to humans for approval.
|
||||
> ✅ **Report upstream** – Summarize changes to security admins weekly.
|
||||
>
|
||||
|
||||
---
|
||||
|
||||
### **Behavior Guidelines**
|
||||
|
||||
- **Conservative by default:** Don’t auto-approve regexes unless confidence > 0.95.
|
||||
|
||||
- **Keep auditability:** Every decision (approve/reject) is logged in the hyperlog.
|
||||
|
||||
- **Respect human overrides:** Never overwrite a regex that a human explicitly locked.
|
||||
|
||||
|
||||
---
|
||||
|
||||
### **Example Meta-Curator Output**
|
||||
|
||||
```json
|
||||
{
|
||||
"action": "approve_regex",
|
||||
"regex_name": "GITLAB_TOKEN",
|
||||
"regex_pattern": "glpat-[0-9A-Za-z\\-_]{20}",
|
||||
"confidence": 0.97,
|
||||
"decision_reason": "Validated against 12 quarantined examples, no false positives found.",
|
||||
"timestamp": "2025-08-02T13:45:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
✅ This meta-agent is the **brains of the rules layer** — keeps Sentinel evolving, but under control.
|
||||
|
||||
---
|
||||
|
||||
## 🚀 **Now You Have:**
|
||||
|
||||
✅ **Migration Path** → Redis → PostgreSQL
|
||||
✅ **Admin Dashboard Spec** → complete with endpoints & layout
|
||||
✅ **Meta-Curator Prompt** → the agent that “manages the manager”
|
||||
|
||||
---
|
||||
Reference in New Issue
Block a user