Pre-cleanup snapshot - all current files

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
tony
2025-08-05 02:32:45 +10:00
parent 26079aa8da
commit 4511f4c801
32 changed files with 5072 additions and 0 deletions

1
.obsidian/app.json vendored Normal file
View File

@@ -0,0 +1 @@
{}

1
.obsidian/appearance.json vendored Normal file
View File

@@ -0,0 +1 @@
{}

31
.obsidian/core-plugins.json vendored Normal file
View File

@@ -0,0 +1,31 @@
{
"file-explorer": true,
"global-search": true,
"switcher": true,
"graph": true,
"backlink": true,
"canvas": true,
"outgoing-link": true,
"tag-pane": true,
"properties": false,
"page-preview": true,
"daily-notes": true,
"templates": true,
"note-composer": true,
"command-palette": true,
"slash-command": false,
"editor-status": true,
"bookmarks": true,
"markdown-importer": false,
"zk-prefixer": false,
"random-note": false,
"outline": true,
"word-count": true,
"slides": false,
"audio-recorder": false,
"workspaces": false,
"file-recovery": true,
"publish": false,
"sync": true,
"webviewer": false
}

206
.obsidian/workspace.json vendored Normal file
View File

@@ -0,0 +1,206 @@
{
"main": {
"id": "49f18c78518039c8",
"type": "split",
"children": [
{
"id": "fea88e09bce7fef2",
"type": "tabs",
"children": [
{
"id": "472092e9ada7a8e6",
"type": "leaf",
"state": {
"type": "empty",
"state": {},
"icon": "lucide-file",
"title": "New tab"
}
}
]
}
],
"direction": "vertical"
},
"left": {
"id": "b510957437397946",
"type": "split",
"children": [
{
"id": "157d722a91bc8e15",
"type": "tabs",
"children": [
{
"id": "9001986372506f85",
"type": "leaf",
"state": {
"type": "file-explorer",
"state": {
"sortOrder": "alphabetical",
"autoReveal": false
},
"icon": "lucide-folder-closed",
"title": "Files"
}
},
{
"id": "1d6f26c2d2402f8e",
"type": "leaf",
"state": {
"type": "search",
"state": {
"query": "",
"matchingCase": false,
"explainSearch": false,
"collapseAll": false,
"extraContext": false,
"sortOrder": "alphabetical"
},
"icon": "lucide-search",
"title": "Search"
}
},
{
"id": "5b830db8721ad2ed",
"type": "leaf",
"state": {
"type": "bookmarks",
"state": {},
"icon": "lucide-bookmark",
"title": "Bookmarks"
}
}
]
}
],
"direction": "horizontal",
"width": 481.5
},
"right": {
"id": "a1ab5e22b95db49c",
"type": "split",
"children": [
{
"id": "245785f7c0bf960b",
"type": "tabs",
"children": [
{
"id": "31a2e09288336a61",
"type": "leaf",
"state": {
"type": "backlink",
"state": {
"file": "modules/whoosh/backend/DOCUMENTATION_SUMMARY.md",
"collapseAll": false,
"extraContext": false,
"sortOrder": "alphabetical",
"showSearch": true,
"searchQuery": "",
"backlinkCollapsed": false,
"unlinkedCollapsed": true
},
"icon": "links-coming-in",
"title": "Backlinks for DOCUMENTATION_SUMMARY"
}
},
{
"id": "57211ee20d0c9d61",
"type": "leaf",
"state": {
"type": "outgoing-link",
"state": {
"file": "modules/whoosh/backend/DOCUMENTATION_SUMMARY.md",
"linksCollapsed": false,
"unlinkedCollapsed": true
},
"icon": "links-going-out",
"title": "Outgoing links from DOCUMENTATION_SUMMARY"
}
},
{
"id": "071c40df45653454",
"type": "leaf",
"state": {
"type": "tag",
"state": {
"sortOrder": "frequency",
"useHierarchy": true,
"showSearch": false,
"searchQuery": ""
},
"icon": "lucide-tags",
"title": "Tags"
}
},
{
"id": "1a55201803c42e38",
"type": "leaf",
"state": {
"type": "outline",
"state": {
"file": "modules/whoosh/backend/DOCUMENTATION_SUMMARY.md",
"followCursor": false,
"showSearch": false,
"searchQuery": ""
},
"icon": "lucide-list",
"title": "Outline of DOCUMENTATION_SUMMARY"
}
}
]
}
],
"direction": "horizontal",
"width": 300,
"collapsed": true
},
"left-ribbon": {
"hiddenItems": {
"switcher:Open quick switcher": false,
"graph:Open graph view": false,
"canvas:Create new canvas": false,
"daily-notes:Open today's daily note": false,
"templates:Insert template": false,
"command-palette:Open command palette": false
}
},
"active": "9001986372506f85",
"lastOpenFiles": [
"modules/slurp/hcfs-python/hcfs/core/__pycache__/filesystem.cpython-310.pyc",
"modules/slurp/hcfs-python/hcfs/core/__pycache__/context_db.cpython-310.pyc",
"modules/slurp/hcfs-python/hcfs/core/__pycache__/__init__.cpython-310.pyc",
"modules/slurp/hcfs-python/hcfs/core/__pycache__",
"modules/slurp/hcfs-python/hcfs/__pycache__/__init__.cpython-310.pyc",
"modules/slurp/hcfs-python/hcfs/__pycache__",
"modules/whoosh/EVENT_CONFIGURATION_SYSTEM.md",
"modules/whoosh/EVENT_CONFIGURATION_SYSTEM.md.tmp.1675830.1754294063541",
"modules/whoosh/frontend/src/test/event-config-integration.test.ts",
"modules/whoosh/frontend/src/test/event-config-integration.test.ts.tmp.1675830.1754293976289",
"modules/whoosh/frontend/src/components/projects/EventTypeConfiguration.tsx",
"modules/whoosh/frontend/src/components/projects/EventTypeConfiguration.tsx.tmp.1675830.1754293868591",
"homepage-content.md",
"modules/posthuman/docs/operations.md",
"modules/posthuman/docs/development.md",
"modules/posthuman/docs/api.md",
"modules/posthuman/docs/deployment.md",
"modules/posthuman/docs/architecture.md",
"modules/posthuman/conductor-kernel/PERFORMANCE_OPTIMIZATION.md",
"modules/posthuman/PROJECT_PLAN.md",
"modules/posthuman/README.md",
"modules/hmmm/PROJECT_PLAN.md",
"modules/whoosh/backend/DEPLOYMENT_FIXES.md",
"modules/whoosh/backend/DOCUMENTATION_SUMMARY.md",
"modules/whoosh/docs/project-complete.md",
"modules/whoosh/docs/environment-requirements.md",
"modules/whoosh/docs/implementation-complete.md",
"modules/whoosh/docs/LOCAL_DEVELOPMENT.md",
"modules/whoosh/docs/phase3-completion-summary.md",
"modules/whoosh/docs/phase4-completion-summary.md",
"modules/whoosh/docs/phase5-completion-summary.md",
"modules/whoosh/frontend/TESTING.md",
"modules/whoosh/results/rosewood_qa_report_1751891435.md",
"modules/whoosh/TESTING_STRATEGY.md",
"modules/whoosh/REPORT.md",
"modules/whoosh/README_DISTRIBUTED.md"
]
}

184
homepage-content.md Normal file
View File

@@ -0,0 +1,184 @@
# CHORUS Services - Homepage Content
## Hero Section
### Primary Headline
**AI Development Teams That Think, Learn, and Optimize Themselves**
### Secondary Headline
The next evolution in AI orchestration: Self-optimizing agents that dynamically build optimal teams, learn from every interaction, and deliver auditable results with complete traceability.
### Value Proposition
CHORUS Services transforms how AI development works. Our breakthrough orchestration platform creates autonomous development teams that continuously improve their own performance, automatically form optimal team compositions, and maintain complete audit trails of every decision.
---
## Key Innovations Section
### Self-Optimizing Intelligence
**AI agents that get better with every task**
Our breakthrough reinforcement learning system enables agents to continuously optimize their own performance through real-time feedback loops. Each completed task makes the entire system more effective.
- **Sub-5ms task routing** with intelligent load balancing
- **48GB distributed GPU infrastructure** for massive parallel processing
- **Enterprise-grade monitoring** with real-time optimization
### Dynamic Team Formation
**Perfect teams, automatically assembled**
Gone are the days of manually coordinating AI tools. CHORUS agents autonomously analyze task requirements and automatically form optimal team compositions from our 8 specialized agent roles.
- **Composable context management** - Knowledge components mix and match across projects
- **Fine-tuned specialized models** optimized for specific development workflows
- **Real-time team rebalancing** based on workload and capabilities
### Complete Auditability
**Every decision traceable, every solution replayable**
Enterprise development demands transparency. CHORUS provides complete traceability of every decision with the ability to replay and understand exactly how solutions were developed.
- **Immutable decision logs** with cryptographic integrity
- **Full solution replay capability** for debugging and compliance
- **End-to-end workflow transparency** for regulatory requirements
---
## Target Audience Benefits
### For Enterprise Development Teams
**10x your development velocity without losing control**
- Autonomous task distribution across optimal AI team compositions
- Complete audit trails for compliance and quality assurance
- Integration with existing enterprise development workflows
- Real-time performance monitoring and optimization
### For Tech Startups
**Compete with larger teams through AI force multiplication**
- Small team leverage through intelligent task orchestration
- Automatic knowledge capture and reuse across projects
- Cost-effective scaling without proportional headcount increases
- Rapid iteration with continuous system improvement
### For Research Organizations
**Auditable, repeatable AI-assisted research processes**
- Complete reproducibility of AI-assisted research workflows
- Transparent decision-making processes for peer review
- Collaborative reasoning between multiple specialized AI agents
- Long-term knowledge accumulation and institutional memory
### For AI Companies
**Cutting-edge orchestration for your own AI development**
- Advanced context management for complex AI development projects
- Multi-model coordination for hybrid AI solutions
- Performance optimization through continuous learning
- Scalable infrastructure for distributed AI development
---
## Technical Differentiators
### Beyond Basic AI Tools
CHORUS Services isn't another AI assistant or code completion tool. We've built the infrastructure that makes AI agents actually work together as high-performing development teams.
**Traditional AI Tools:**
- Single-agent interactions
- No persistent team memory
- Manual coordination required
- Limited task complexity
**CHORUS Services:**
- Self-organizing multi-agent teams
- Persistent organizational knowledge
- Autonomous task coordination
- Enterprise-scale complexity handling
### The CHORUS Ecosystem
**Integrated components working in perfect harmony**
- **WHOOSH**: Intelligent workflow orchestration with role-based agent assignment
- **BZZZ**: Peer-to-peer coordination without single points of failure
- **SLURP**: Context management that learns what information matters
- **COOEE**: Continuous feedback loops for system optimization
- **HMMM**: Collaborative reasoning before critical decisions
---
## Proven Results
### Measurable Performance Improvements
**Real metrics from production deployments**
- **92% reduction** in context loss events across development sessions
- **78% reduction** in hallucinated or incorrect AI outputs
- **40% fewer iterations** required for project completion
- **60% reduction** in duplicated work across team members
- **34% faster** overall project delivery times
### Enterprise-Ready Architecture
**Built for scale, security, and reliability**
- Multi-tenant SaaS deployment with enterprise security
- Hybrid cloud/on-premises deployment options
- Role-based access control and complete audit logging
- Integration with existing CI/CD and project management tools
---
## Business Outcomes Focus
### Reduce Development Risk
- Complete transparency in AI decision-making processes
- Audit trails for compliance and quality assurance
- Reduced hallucinations through collaborative verification
- Consistent results through continuous system optimization
### Accelerate Innovation
- Faster iteration cycles through intelligent task orchestration
- Knowledge reuse across projects and teams
- Automatic optimization of development workflows
- Scalable capacity without proportional cost increases
### Maintain Control
- Full visibility into AI agent decision-making
- Configurable guardrails and approval workflows
- Human oversight integration at critical decision points
- Complete solution replay for debugging and improvement
---
## Call to Action
### Primary CTA
**Experience Self-Optimizing AI Development**
*Schedule a live demonstration of autonomous team formation and optimization*
### Secondary CTAs
- **View Technical Architecture** - Deep dive into our breakthrough orchestration platform
- **Download Case Study** - See how CHORUS reduced development time by 40% for enterprise clients
- **Request Private Demo** - See your specific development challenges solved in real-time
---
## Trust Indicators
### Production-Proven Technology
"CHORUS Services isn't experimental - it's deployed and delivering measurable results in production environments today."
### Enterprise Security Standards
- SOC 2 Type II compliant infrastructure
- Enterprise-grade data encryption and access controls
- Complete audit logging and compliance reporting
- Hybrid deployment options for sensitive workloads
### Technical Leadership
Built by the team that solved AI's fundamental context and coordination problems. Our research-to-production pipeline ensures breakthrough innovations reach enterprise customers quickly and reliably.
---
*Ready to transform your development velocity with self-optimizing AI teams?*
**Schedule your demonstration today.**

View File

1
modules/posthuman Submodule

Submodule modules/posthuman added at 2e39cd8664

37
modules/shhh/Dockerfile Normal file
View File

@@ -0,0 +1,37 @@
# SHHH Secrets Sentinel Docker Image
FROM python:3.11-slim
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
libpq-dev \
curl \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements first for better caching
COPY requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY . .
# Create data directories
RUN mkdir -p /data /config /logs
# Set permissions
RUN chmod +x main.py
# Expose API port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
# Default command (can be overridden)
CMD ["python", "main.py", "--mode", "monitor", "--structured-logs"]

View File

@@ -0,0 +1,319 @@
## Plan: Hybrid Secret Detection with Sanitized Log Replication
### 1. Objective
To implement a robust, two-stage secret detection pipeline that:
1. Reads from a primary hypercore log in real-time.
2. Uses a fast, regex-based scanner for initial detection.
3. Leverages a local LLM (via Ollama) for deeper, context-aware analysis of potential secrets to reduce false positives.
4. Writes a fully sanitized version of the log to a new, parallel "sister" hypercore stream.
5. Quarantines and alerts on confirmed high-severity secrets, ensuring the original log remains untouched for audit purposes while the sanitized log is safe for wider consumption.
### 2. High-Level Architecture & Data Flow
The process will follow this data flow:
```
┌──────────────────────────┐
[Primary Hypercore Log] ─────► │ HypercoreReader │
└────────────┬─────────────┘
│ (Raw Log Entry)
┌──────────<E29480><E29480>───────────────┐
│ MessageProcessor │
│ (Orchestrator) │
└────────────┬─────────────┘
┌───────────────────────▼───────────────────────┐
│ Stage 1: Fast Regex Scan │
│ (SecretDetector) │
└───────────────────────┬───────────────────────┘
┌───────────────────────────┼───────────────────────────┐
│ (No Match) │ (Potential Match) │ (High-Confidence Match)
▼ ▼ ▼
┌──────────────────────────┐ ┌─<E2948C><E29480>────────────────────────┐ ┌──────────────────────────┐
│ SanitizedWriter │ │ Stage 2: LLM Analysis │ │ (Skip LLM) │
│ (Writes original entry) │ │ (LLMAnalyzer) │ │ Quarantine Immediately │
└──────────────────────────┘ └────────────┬─────────────┘ └────────────┬─────────────┘
▲ │ (LLM Confirms) │
│ ▼ ▼
│ ┌──────────────────────────┐ ┌──────────────────────────┐
│ │ QuarantineManager │ │ Alerting System │
│ │ (DB Storage, Alerts) │ │ (Webhooks) │
│ └──────────────────────────┘ └────────────<E29480><E29480>─────────────┘
│ │
│ ▼
│ ┌──────────────────────────┐
└──────────────┤ SanitizedWriter │
│ (Writes REDACTED entry) │
└──────────────────────────┘
[Sanitized Hypercore Log]
```
### 3. Component Implementation Plan
This plan modifies existing components and adds new ones.
#### 3.1. New Component: `core/llm_analyzer.py`
This new file will contain all logic for interacting with the Ollama instance. This isolates the dependency and makes it easy to test or swap out the LLM backend.
```python
# core/llm_analyzer.py
import requests
import json
class LLMAnalyzer:
"""Analyzes text for secrets using a local LLM via Ollama."""
def __init__(self, endpoint: str, model: str, system_prompt: str):
self.endpoint = endpoint
self.model = model
self.system_prompt = system_prompt
def analyze(self, text: str) -> dict:
"""
Sends text to the Ollama API for analysis and returns a structured JSON response.
Returns:
A dictionary like:
{
"secret_found": bool,
"secret_type": str,
"confidence_score": float,
"severity": str
}
Returns a default "not found" response on error.
"""
prompt = f"Log entry: \"{text}\"\n\nAnalyze this for secrets and respond with only the required JSON."
payload = {
"model": self.model,
"system": self.system_prompt,
"prompt": prompt,
"format": "json",
"stream": False
}
try:
response = requests.post(self.endpoint, json=payload, timeout=15)
response.raise_for_status()
# The response from Ollama is a JSON string, which needs to be parsed.
analysis = json.loads(response.json().get("response", "{}"))
return analysis
except (requests.exceptions.RequestException, json.JSONDecodeError) as e:
print(f"[ERROR] LLMAnalyzer failed: {e}")
# Fallback: If LLM fails, assume no secret was found to avoid blocking the pipeline.
return {"secret_found": False}
```
#### 3.2. New Component: `core/sanitized_writer.py`
This component is responsible for writing to the new, sanitized hypercore log. This abstraction allows us to easily change the output destination in the future.
```python
# core/sanitized_writer.py
class SanitizedWriter:
"""Writes log entries to the sanitized sister hypercore log."""
def __init__(self, sanitized_log_path: str):
self.log_path = sanitized_log_path
# Placeholder for hypercore writing logic. For now, we'll append to a file.
self.log_file = open(self.log_path, "a")
def write(self, log_entry: str):
"""Writes a single log entry to the sanitized stream."""
self.log_file.write(log_entry + "\n")
self.log_file.flush()
def close(self):
self.log_file.close()
```
#### 3.3. Modify: `core/detector.py`
We will enhance the `SecretDetector` to not only find matches but also redact them.
```python
# core/detector.py
import re
class SecretDetector:
def __init__(self, patterns_file: str = "patterns.yaml"):
# ... (load_patterns remains the same) ...
def scan(self, text: str) -> list[dict]:
"""Scans text and returns a list of found secrets with metadata."""
matches = []
for pattern_name, pattern in self.patterns.items():
if pattern.get("active", True):
regex_match = re.search(pattern["regex"], text)
if regex_match:
matches.append({
"secret_type": pattern_name,
"value": regex_match.group(0),
"confidence": pattern.get("confidence", 0.8), # Default confidence
"severity": pattern.get("severity", "MEDIUM")
})
return matches
def redact(self, text: str, secret_value: str) -> str:
"""Redacts a specific secret value within a string."""
redacted_str = secret_value[:4] + "****" + secret_value[-4:]
return text.replace(secret_value, f"[REDACTED:{redacted_str}]")
```
#### 3.4. Modify: `pipeline/processor.py`
This is the orchestrator and will see the most significant changes to implement the hybrid logic.
```python
# pipeline/processor.py
from core.hypercore_reader import HypercoreReader
from core.detector import SecretDetector
from core.llm_analyzer import LLMAnalyzer
from core.quarantine import QuarantineManager
from core.sanitized_writer import SanitizedWriter
class MessageProcessor:
def __init__(self, reader: HypercoreReader, detector: SecretDetector, llm_analyzer: LLMAnalyzer, quarantine: QuarantineManager, writer: SanitizedWriter, llm_threshold: float):
self.reader = reader
self.detector = detector
self.llm_analyzer = llm_analyzer
self.quarantine = quarantine
self.writer = writer
self.llm_threshold = llm_threshold # e.g., 0.90
async def process_stream(self):
"""Main processing loop for the hybrid detection model."""
async for entry in self.reader.stream_entries():
# Stage 1: Fast Regex Scan
regex_matches = self.detector.scan(entry.content)
if not regex_matches:
# No secrets found, write original entry to sanitized log
self.writer.write(entry.content)
continue
# A potential secret was found. Default to sanitized, but may be quarantined.
sanitized_content = entry.content
should_quarantine = False
confirmed_secret = None
for match in regex_matches:
# High-confidence regex matches trigger immediate quarantine, skipping LLM.
if match['confidence'] >= self.llm_threshold:
should_quarantine = True
confirmed_secret = match
break # One high-confidence match is enough
# Stage 2: Low-confidence matches go to LLM for verification.
llm_result = self.llm_analyzer.analyze(entry.content)
if llm_result.get("secret_found"):
should_quarantine = True
# Prefer LLM's classification but use regex value for redaction
confirmed_secret = {
"secret_type": llm_result.get("secret_type", match['secret_type']),
"value": match['value'],
"severity": llm_result.get("severity", match['severity'])
}
break
if should_quarantine and confirmed_secret:
# A secret is confirmed. Redact, quarantine, and alert.
sanitized_content = self.detector.redact(entry.content, confirmed_secret['value'])
self.quarantine.quarantine_message(
message=entry,
secret_type=confirmed_secret['secret_type'],
severity=confirmed_secret['severity'],
redacted_content=sanitized_content
)
# Potentially trigger alerts here as well
print(f"[ALERT] Confirmed secret {confirmed_secret['secret_type']} found and quarantined.")
# Write the (potentially redacted) content to the sanitized log
self.writer.write(sanitized_content)
```
#### 3.5. Modify: `main.py`
The main entry point will be updated to instantiate and wire together the new and modified components.
```python
# main.py
# ... imports ...
import asyncio
from core.hypercore_reader import HypercoreReader
from core.detector import SecretDetector
from core.llm_analyzer import LLMAnalyzer
from core.quarantine import QuarantineManager
from core.sanitized_writer import SanitizedWriter
# ... other imports
def main():
# 1. Configuration
# Load from a new config.yaml or environment variables
PRIMARY_LOG_PATH = "/path/to/primary/hypercore.log"
SANITIZED_LOG_PATH = "/path/to/sanitized/hypercore.log"
PATTERNS_PATH = "patterns.yaml"
DB_CONNECTION = "..."
OLLAMA_ENDPOINT = "http://localhost:11434/api/generate"
OLLAMA_MODEL = "llama3"
LLM_CONFIDENCE_THRESHOLD = 0.90 # Regex confidence >= this skips LLM
with open("SHHH_SECRETS_SENTINEL_AGENT_PROMPT.md", "r") as f:
OLLAMA_SYSTEM_PROMPT = f.read()
# 2. Instantiation
reader = HypercoreReader(PRIMARY_LOG_PATH)
detector = SecretDetector(PATTERNS_PATH)
llm_analyzer = LLMAnalyzer(OLLAMA_ENDPOINT, OLLAMA_MODEL, OLLAMA_SYSTEM_PROMPT)
quarantine = QuarantineManager(DB_CONNECTION)
writer = SanitizedWriter(SANITIZED_LOG_PATH)
processor = MessageProcessor(
reader=reader,
detector=detector,
llm_analyzer=llm_analyzer,
quarantine=quarantine,
writer=writer,
llm_threshold=LLM_CONFIDENCE_THRESHOLD
)
# 3. Execution
print("Starting SHHH Hypercore Monitor...")
try:
asyncio.run(processor.process_stream())
except KeyboardInterrupt:
print("Shutting down...")
finally:
writer.close()
if __name__ == "__main__":
main()
```
### 4. Phased Rollout
1. **Phase 1: Component Implementation (1-2 days)**
* Create `core/llm_analyzer.py` and `core/sanitized_writer.py`.
* Write unit tests for both new components. Mock the `requests` calls for the analyzer.
* Update `core/detector.py` with the `redact` method and update its unit tests.
2. **Phase 2: Orchestration Logic (2-3 days)**
* Implement the new logic in `pipeline/processor.py`.
* Write integration tests for the processor that simulate the full flow: no match, low-confidence match (with mocked LLM response), and high-confidence match.
* Update `main.py` to wire everything together.
3. **Phase 3: Configuration & Testing (1 day)**
* Add a `config.yaml` to manage all paths, thresholds, and endpoints.
* Perform an end-to-end test run with a sample log file and a running Ollama instance.
* Verify that the primary log is untouched, the sanitized log is created correctly (with and without redactions), and the quarantine database is populated as expected.
### 5. Success Criteria
* **Zero Leaks:** The sanitized log stream contains no secrets.
* **High Accuracy:** False positive rate is demonstrably lower than a regex-only solution, verified during testing.
* **Performance:** The pipeline maintains acceptable latency (<200ms per log entry on average, accounting for occasional LLM analysis).
* **Auditability:** The primary log remains a perfect, unaltered source of truth. All detection and quarantine events are logged in the PostgreSQL database.

561
modules/shhh/README.md Normal file
View File

@@ -0,0 +1,561 @@
🔥 Excellent — lets push this all the way into a **production-grade spec**.
---
## 📂 **1⃣ Feedback Ingestion Spec**
This defines how curators/humans give feedback to the Sentinel so it can **update its detection rules (patterns.yaml)** safely.
---
### 🔄 **Feedback Flow**
1. **Curator/Reviewer sees alert** → marks it as:
* `false_positive` (regex over-triggered)
* `missed_secret` (regex failed to detect)
* `uncertain` (needs better regex refinement)
2. **Feedback API** ingests the report:
```json
{
"alert_id": "log_345",
"secret_type": "AWS_ACCESS_KEY",
"feedback_type": "false_positive",
"evidence": "Key was dummy data: TESTKEY123",
"suggested_regex_fix": null
}
```
3. **Meta-Learner** updates rules:
* `false_positive` → adds **exceptions** (e.g., allowlist prefixes like `TESTKEY`).
* `missed_secret` → drafts **new regex** from evidence (using regex generator or LLM).
* Writes changes to **patterns.yaml** under `pending_review`.
4. **Security admin approves** before the new regex is marked `active: true`.
---
### 🧠 **Feedback Schema in YAML**
```yaml
pending_updates:
- regex_name: AWS_ACCESS_KEY
action: modify
new_regex: "AKIA[0-9A-Z]{16}(?!TESTKEY)"
confidence: 0.82
status: "pending human review"
submitted_by: curator_2
timestamp: 2025-08-02T12:40:00Z
```
✅ This keeps **audit trails** & allows **safe hot updates**.
---
## ⚙️ **2⃣ Real AWS/GitHub Webhook Payload Templates**
These are **example POST payloads** your Sentinel would send when it detects a leaked secret.
---
### 🔐 **AWS Access Key Revocation**
**Endpoint:**
`POST https://security.example.com/hooks/aws-revoke`
**Payload:**
```json
{
"event": "secret_leak_detected",
"secret_type": "AWS_ACCESS_KEY",
"redacted_key": "AKIA****XYZ",
"log_reference": "hyperlog:58321",
"recommended_action": "Revoke IAM access key immediately",
"severity": "HIGH",
"timestamp": "2025-08-02T12:45:00Z"
}
```
➡ Your security automation would call AWS CLI or IAM API:
```bash
aws iam update-access-key --access-key-id <redacted> --status Inactive
aws iam delete-access-key --access-key-id <redacted>
```
---
### 🐙 **GitHub Token Revocation**
**Endpoint:**
`POST https://security.example.com/hooks/github-revoke`
**Payload:**
```json
{
"event": "secret_leak_detected",
"secret_type": "GITHUB_TOKEN",
"redacted_key": "ghp_****abcd",
"repository": "repo-name",
"log_reference": "hyperlog:58322",
"severity": "HIGH",
"recommended_action": "Invalidate GitHub token via API",
"timestamp": "2025-08-02T12:46:00Z"
}
```
➡ This would tie into GitHubs [token-scanning API](https://docs.github.com/en/developers/overview/secret-scanning) or use PAT revocation.
---
### 💬 **Slack Token Revocation**
**Endpoint:**
`POST https://security.example.com/hooks/slack-revoke`
**Payload:**
```json
{
"event": "secret_leak_detected",
"secret_type": "SLACK_TOKEN",
"redacted_key": "xoxb****hjk",
"workspace": "company-slack",
"log_reference": "hyperlog:58323",
"severity": "HIGH",
"recommended_action": "Revoke Slack bot/user token",
"timestamp": "2025-08-02T12:47:00Z"
}
```
➡ Slack Admin API can be used to **revoke** or **rotate**.
---
## 📡 **3⃣ Redis or PostgreSQL Quarantine Store**
Switching from memory to **persistent storage** means quarantined logs survive restarts.
---
### ✅ **Redis Option (Fast, Volatile)**
```python
import redis, json
r = redis.Redis(host='localhost', port=6379, decode_responses=True)
def quarantine_log(log_line, reason):
entry = {"timestamp": datetime.utcnow().isoformat() + "Z", "reason": reason, "log_line": log_line}
r.lpush("quarantine", json.dumps(entry))
print(f"[QUARANTINE] Stored in Redis: {reason}")
```
* 🏎 **Pros:** Fast, easy to scale.
* ⚠️ **Cons:** Volatile unless persisted (RDB/AOF).
---
### ✅ **PostgreSQL Option (Auditable, Durable)**
**Schema:**
```sql
CREATE TABLE quarantine (
id SERIAL PRIMARY KEY,
timestamp TIMESTAMPTZ NOT NULL,
reason TEXT NOT NULL,
log_line TEXT NOT NULL,
reviewed BOOLEAN DEFAULT FALSE
);
```
**Python Insert:**
```python
import psycopg2
conn = psycopg2.connect("dbname=sentinel user=postgres password=secret")
cursor = conn.cursor()
def quarantine_log(log_line, reason):
entry_time = datetime.utcnow().isoformat() + "Z"
cursor.execute(
"INSERT INTO quarantine (timestamp, reason, log_line) VALUES (%s, %s, %s)",
(entry_time, reason, log_line)
)
conn.commit()
print(f"[QUARANTINE] Stored in PostgreSQL: {reason}")
```
**Postgres is better for long-term auditing** — you can run reports like:
* “How many AWS keys leaked this month?”
* “Which agents generated the most HIGH-severity quarantines?”
---
We now have:
**Detection → Redaction → Quarantine → Revocation → Feedback → Pattern Evolution**
**patterns.yaml** for versioned regex
**Webhooks** for real-time secret revocation
**Persistent quarantine store** (Redis or Postgres)
---
## 📜 **1⃣ Migration Script: Redis → PostgreSQL**
This script will migrate existing quarantined log entries from **Redis** to **Postgres**.
```python
import redis, json, psycopg2
from datetime import datetime
# Redis config
r = redis.Redis(host='localhost', port=6379, decode_responses=True)
# Postgres config
conn = psycopg2.connect("dbname=sentinel user=postgres password=secret")
cursor = conn.cursor()
def migrate_quarantine():
count = 0
while True:
entry_json = r.rpop("quarantine") # pop oldest entry from Redis
if not entry_json:
break
entry = json.loads(entry_json)
cursor.execute(
"INSERT INTO quarantine (timestamp, reason, log_line) VALUES (%s, %s, %s)",
(entry["timestamp"], entry["reason"], entry["log_line"])
)
count += 1
conn.commit()
print(f"[MIGRATION] Moved {count} quarantined entries from Redis → PostgreSQL")
if __name__ == "__main__":
migrate_quarantine()
```
**Run once** after Postgres is set up — empties Redis queue into the durable DB.
---
## 🖥 **2⃣ Admin Dashboard Spec**
**Purpose:** A web UI to manage the Sentinels security pipeline.
---
### 🎯 **Core Features**
**Quarantine Browser**
* Paginated view of all quarantined logs
* Search/filter by `secret_type`, `source_agent`, `date`, `status`
* Mark quarantined logs as **reviewed** or **false alarm**
**Regex Rules Manager**
* Lists all regexes from `patterns.yaml`
* Add / update / deactivate rules via UI
* Shows `pending_updates` flagged by the Meta-Learner for human approval
**Revocation Status Board**
* See which secrets triggered revocations
* Status of revocation hooks (success/fail)
**Metrics Dashboard**
* Charts: “Secrets Detected Over Time”, “Top Sources of Leaks”
* KPIs: # HIGH severity secrets this month, # rules updated, # false positives
---
### 🏗 **Tech Stack Suggestion**
* **Backend:** FastAPI (Python)
* **Frontend:** React + Tailwind
* **DB:** PostgreSQL for quarantine + rules history
* **Auth:** OAuth (GitHub/Google) + RBAC (only security admins can approve regex changes)
---
### 🔌 **Endpoints**
```
GET /api/quarantine → list quarantined entries
POST /api/quarantine/review → mark entry as reviewed
GET /api/rules → list regex patterns
POST /api/rules/update → update or add a regex
GET /api/revocations → list revocation events
```
---
### 🖥 **Mock Dashboard Layout**
* **Left Nav:** Quarantine | Rules | Revocations | Metrics
* **Main Panel:**
* Data tables with sorting/filtering
* Inline editors for regex rules
* Approve/Reject buttons for pending regex updates
✅ Basically a **security control room** for Sentinel.
---
## 🤖 **3⃣ Meta-Curator AI Prompt**
This agent reviews Sentinels work and **tunes it automatically**.
---
### **Meta-Curator: System Prompt**
> **Role & Mission:**
> You are the **Meta-Curator**, a supervisory AI responsible for reviewing the **Secrets Sentinels** detections, regex updates, and feedback reports.
>
> **Core Responsibilities:**
> ✅ **Audit alerts** Look for false positives, duplicates, or missed leaks by cross-checking Sentinel outputs.
> ✅ **Review regex proposals** When Sentinel drafts new regex rules, decide if theyre:
>
> * ✅ Approved (safe to activate)
> * ❌ Rejected (too broad or incorrect)
> * 🕒 Deferred (needs human review)
> ✅ **Tune detection thresholds** Adjust `confidence` or `severity` on patterns based on outcomes.
> ✅ **Generate new rules** If multiple missed secrets share a format, draft a regex and submit to humans for approval.
> ✅ **Report upstream** Summarize changes to security admins weekly.
---
### **Behavior Guidelines**
* **Conservative by default:** Dont auto-approve regexes unless confidence > 0.95.
* **Keep auditability:** Every decision (approve/reject) is logged in the hyperlog.
* **Respect human overrides:** Never overwrite a regex that a human explicitly locked.
---
### **Example Meta-Curator Output**
```json
{
"action": "approve_regex",
"regex_name": "GITLAB_TOKEN",
"regex_pattern": "glpat-[0-9A-Za-z\\-_]{20}",
"confidence": 0.97,
"decision_reason": "Validated against 12 quarantined examples, no false positives found.",
"timestamp": "2025-08-02T13:45:00Z"
}
```
✅ This meta-agent is the **brains of the rules layer** — keeps Sentinel evolving, but under control.
---
## 🚀 **Now You Have:**
**Migration Path** → Redis → PostgreSQL
**Admin Dashboard Spec** → complete with endpoints & layout
**Meta-Curator Prompt** → the agent that “manages the manager”
Alright — heres the next batch to lock this into a **real, buildable system**.
---
## 📂 **1⃣ `patterns_history` Table Schema**
This tracks **every regex change** ever made — who/what made it, why, and when.
```sql
CREATE TABLE patterns_history (
id SERIAL PRIMARY KEY,
regex_name TEXT NOT NULL,
old_regex TEXT,
new_regex TEXT,
action TEXT CHECK (action IN ('add', 'update', 'remove')),
confidence NUMERIC(3,2),
status TEXT CHECK (status IN ('approved', 'pending', 'rejected')),
submitted_by TEXT NOT NULL,
approved_by TEXT,
decision_reason TEXT,
timestamp TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
);
```
### ✅ What this gives you:
* **Full audit trail** (critical for security compliance).
* You can run queries like:
* *“Show all regex changes made by Meta-Curator vs. humans.”*
* *“List all rules rejected in the last 90 days.”*
---
## 🖼 **2⃣ Admin Dashboard Wireframes**
**Goal:** show your devs exactly what to build — no ambiguity.
---
### **🔒 Dashboard Home**
```
------------------------------------------------------
| [Sentinel Logo] Secrets Sentinel Dashboard |
------------------------------------------------------
| Quarantine | Rules | Revocations | Metrics | Admin |
------------------------------------------------------
| Welcome back, Security Admin! |
| |
| ▢ 32 Quarantined logs waiting review |
| ▢ 4 Pending regex updates |
| ▢ 2 Failed revocation hooks |
------------------------------------------------------
```
---
### **🗄 Quarantine View**
```
------------------------------------------------------
| Quarantine Logs |
------------------------------------------------------
| Search: [______________] [Filter ▼] |
------------------------------------------------------
| Log ID | Secret Type | Severity | Status |
------------------------------------------------------
| log_4287 | AWS_ACCESS_KEY| HIGH | PENDING |
| log_4288 | JWT | MEDIUM | REVIEWED|
| log_4289 | SSH_KEY | HIGH | PENDING |
------------------------------------------------------
[ View Details ] [ Mark as Reviewed ] [ Delete ]
```
**Clicking “View Details” →** shows full log snippet (with redacted secret).
---
### **📜 Regex Manager**
```
------------------------------------------------------
| Regex Rules |
------------------------------------------------------
| Name | Regex Pattern | Active |
------------------------------------------------------
| AWS_ACCESS_KEY | AKIA[0-9A-Z]{16} | ✔ |
| JWT | eyJ[A-Za-z0-9_-]+?\.[…] | ✔ |
| SLACK_TOKEN | xox[baprs]-[0-9A-Za-z-]{10,48} | ✔ |
------------------------------------------------------
[ Add New Regex ] [ View History ]
```
Clicking **View History** → pulls from `patterns_history`.
---
### **📊 Metrics View**
* **Line Chart:** “Secrets Detected Over Time”
* **Bar Chart:** “Secrets by Type” (AWS, GitHub, JWT, etc.)
* **KPIs:**
* 🔴 High Severity Leaks: 12 this week
* 🟢 Regex Accuracy: 94%
---
## ⚙️ **3⃣ FastAPI Skeleton**
Heres the **starter code** for your dev team to run with.
```python
from fastapi import FastAPI, Depends
from pydantic import BaseModel
from typing import List
import psycopg2, json
app = FastAPI(title="Secrets Sentinel Dashboard API")
# --- Database Setup ---
conn = psycopg2.connect("dbname=sentinel user=postgres password=secret")
cursor = conn.cursor()
# --- Models ---
class QuarantineEntry(BaseModel):
id: int
timestamp: str
reason: str
log_line: str
reviewed: bool
class RegexRule(BaseModel):
regex_name: str
regex_pattern: str
severity: str
confidence: float
active: bool
# --- Endpoints ---
@app.get("/quarantine", response_model=List[QuarantineEntry])
def get_quarantine():
cursor.execute("SELECT id, timestamp, reason, log_line, reviewed FROM quarantine")
rows = cursor.fetchall()
return [QuarantineEntry(id=r[0], timestamp=str(r[1]), reason=r[2], log_line=r[3], reviewed=r[4]) for r in rows]
@app.post("/quarantine/review/{entry_id}")
def review_quarantine(entry_id: int):
cursor.execute("UPDATE quarantine SET reviewed=true WHERE id=%s", (entry_id,))
conn.commit()
return {"status": "ok", "message": f"Quarantine entry {entry_id} marked reviewed"}
@app.get("/rules", response_model=List[RegexRule])
def get_rules():
# Load from patterns.yaml
with open("patterns.yaml", "r") as f:
patterns = json.load(f) if f.read().strip().startswith("{") else {}
rules = []
for name, rule in patterns.get("patterns", {}).items():
rules.append(RegexRule(
regex_name=name,
regex_pattern=rule["regex"],
severity=rule["severity"],
confidence=rule["confidence"],
active=rule["active"]
))
return rules
@app.post("/rules/update")
def update_rule(rule: RegexRule):
# Append to patterns_history table
cursor.execute("""
INSERT INTO patterns_history (regex_name, old_regex, new_regex, action, confidence, status, submitted_by)
VALUES (%s, %s, %s, 'update', %s, 'pending', 'admin')
""", (rule.regex_name, None, rule.regex_pattern, rule.confidence))
conn.commit()
return {"status": "ok", "message": f"Regex {rule.regex_name} queued for update"}
```
**Why this skeleton works:**
* REST endpoints for **Quarantine**, **Rules**, **History**.
* Uses **Postgres for persistence**.
* Reads from `patterns.yaml` for active rules.
---
## 🚀 **Now You Have:**
✅ A **Postgres schema** for regex change history.
**Wireframes** for the admin dashboard.
✅ A **FastAPI skeleton** your team can expand into a full API/UI stack.

View File

@@ -0,0 +1,512 @@
# 🔒 SHHH Hypercore Log Monitor - Implementation Plan
## Executive Summary
This plan outlines the creation of a Python application that monitors our hypercore log to ensure no secrets are leaked in BZZZ messages, based on the SHHH module's secrets detection framework.
## Project Overview
### Objective
Create a real-time monitoring system that:
- Monitors hypercore log entries for secret patterns
- Detects potential secrets in BZZZ P2P messages before they propagate
- Quarantines suspicious entries and triggers automatic remediation
- Provides audit trails and security dashboard for compliance
### Architecture Integration
- **Hypercore Log**: Source of truth for all CHORUS Services events
- **BZZZ Network**: P2P messaging layer that could inadvertently transmit secrets
- **SHHH Module**: Existing secrets detection framework and patterns
- **Monitoring App**: New Python application bridging these systems
## Technical Requirements
### 1. Hypercore Log Integration
```python
# Real-time log monitoring
- Stream hypercore entries as they're written
- Parse BZZZ message payloads for secret patterns
- Filter for message types that could contain secrets
- Handle log rotation and recovery scenarios
```
### 2. Secret Detection Engine
Based on SHHH's `patterns.yaml` framework:
```yaml
patterns:
AWS_ACCESS_KEY:
regex: "AKIA[0-9A-Z]{16}"
severity: "HIGH"
confidence: 0.95
active: true
GITHUB_TOKEN:
regex: "ghp_[0-9A-Za-z]{36}"
severity: "HIGH"
confidence: 0.92
active: true
PRIVATE_KEY:
regex: "-----BEGIN [A-Z ]*PRIVATE KEY-----"
severity: "CRITICAL"
confidence: 0.98
active: true
```
### 3. Quarantine & Response System
- **Immediate**: Block message propagation in BZZZ network
- **Log**: Store quarantined entries in PostgreSQL
- **Alert**: Notify security team via webhooks
- **Revoke**: Trigger automatic secret revocation APIs
## Implementation Architecture
### Phase 1: Core Monitoring System (Weeks 1-2)
#### 1.1 Hypercore Log Reader
```python
# /shhh-monitor/core/hypercore_reader.py
class HypercoreReader:
def __init__(self, log_path: str):
self.log_path = log_path
self.position = 0
def stream_entries(self) -> Iterator[LogEntry]:
"""Stream new hypercore entries in real-time"""
# Tail-like functionality with inotify
# Parse hypercore binary format
# Yield structured LogEntry objects
def parse_bzzz_message(self, entry: LogEntry) -> Optional[BzzzMessage]:
"""Extract BZZZ message payload from hypercore entry"""
# Decode BZZZ message format
# Extract message content and metadata
# Return structured message or None
```
#### 1.2 Secret Detection Engine
```python
# /shhh-monitor/core/detector.py
class SecretDetector:
def __init__(self, patterns_file: str = "patterns.yaml"):
self.patterns = self.load_patterns(patterns_file)
def scan_message(self, message: BzzzMessage) -> List[SecretMatch]:
"""Scan BZZZ message for secret patterns"""
matches = []
for pattern_name, pattern in self.patterns.items():
if pattern["active"]:
matches.extend(self.apply_regex(message, pattern))
return matches
def redact_secret(self, text: str, match: SecretMatch) -> str:
"""Redact detected secret while preserving context"""
# Replace secret with asterisks, keep first/last chars
# Maintain log readability for analysis
```
#### 1.3 Quarantine System
```python
# /shhh-monitor/core/quarantine.py
class QuarantineManager:
def __init__(self, db_connection: str):
self.db = psycopg2.connect(db_connection)
def quarantine_message(self, message: BzzzMessage, matches: List[SecretMatch]):
"""Store quarantined message and block propagation"""
# Insert into quarantine table
# Generate alert payload
# Trigger BZZZ network block
def send_alert(self, severity: str, secret_type: str, redacted_content: str):
"""Send webhook alerts for detected secrets"""
# POST to security webhook endpoints
# Different payloads for AWS, GitHub, Slack tokens
# Include revocation recommendations
```
### Phase 2: BZZZ Network Integration (Weeks 3-4)
#### 2.1 BZZZ Message Interceptor
```python
# /shhh-monitor/integrations/bzzz_interceptor.py
class BzzzInterceptor:
def __init__(self, bzzz_config: Dict):
self.bzzz_client = BzzzClient(bzzz_config)
def install_message_hook(self):
"""Install pre-send hook in BZZZ network layer"""
# Intercept messages before P2P transmission
# Scan with SecretDetector
# Block or allow message propagation
def block_message(self, message_id: str, reason: str):
"""Prevent message from propagating in P2P network"""
# Mark message as blocked in BZZZ
# Log blocking reason
# Notify sender agent of security violation
```
#### 2.2 Real-time Processing Pipeline
```python
# /shhh-monitor/pipeline/processor.py
class MessageProcessor:
def __init__(self, detector: SecretDetector, quarantine: QuarantineManager):
self.detector = detector
self.quarantine = quarantine
async def process_hypercore_stream(self):
"""Main processing loop for hypercore monitoring"""
async for entry in self.hypercore_reader.stream_entries():
if bzzz_message := self.parse_bzzz_message(entry):
matches = self.detector.scan_message(bzzz_message)
if matches:
await self.handle_secret_detection(bzzz_message, matches)
async def handle_secret_detection(self, message: BzzzMessage, matches: List[SecretMatch]):
"""Handle detected secrets with appropriate response"""
# Determine severity level
# Quarantine message
# Send alerts
# Trigger revocation if needed
# Update detection statistics
```
### Phase 3: Admin Dashboard & Feedback Loop (Weeks 5-6)
#### 3.1 FastAPI Backend
```python
# /shhh-monitor/api/main.py
from fastapi import FastAPI, Depends
from .models import QuarantineEntry, SecretPattern, RevocationEvent
app = FastAPI(title="SHHH Hypercore Monitor API")
@app.get("/quarantine", response_model=List[QuarantineEntry])
async def get_quarantine_entries():
"""List all quarantined messages"""
@app.post("/quarantine/{entry_id}/review")
async def review_quarantine_entry(entry_id: int, action: str):
"""Mark quarantine entry as reviewed/false positive"""
@app.get("/patterns", response_model=List[SecretPattern])
async def get_detection_patterns():
"""List all secret detection patterns"""
@app.post("/patterns/{pattern_name}/update")
async def update_pattern(pattern_name: str, pattern: SecretPattern):
"""Update regex pattern based on feedback"""
```
#### 3.2 React Dashboard Frontend
```typescript
// /shhh-monitor/dashboard/src/components/QuarantineDashboard.tsx
interface QuarantineDashboard {
// Real-time quarantine feed
// Pattern management interface
// Revocation status tracking
// Security metrics and charts
// Alert configuration
}
```
### Phase 4: Automated Response & Learning (Weeks 7-8)
#### 4.1 Automated Secret Revocation
```python
# /shhh-monitor/automation/revocation.py
class SecretRevoker:
def __init__(self):
self.aws_client = boto3.client('iam')
self.github_client = github.Github()
self.slack_client = slack.WebClient()
async def revoke_aws_key(self, access_key_id: str):
"""Automatically deactivate AWS access key"""
self.aws_client.update_access_key(
AccessKeyId=access_key_id,
Status='Inactive'
)
async def revoke_github_token(self, token: str):
"""Revoke GitHub personal access token"""
# Use GitHub's token scanning API
# Or organization webhook for automatic revocation
async def revoke_slack_token(self, token: str):
"""Revoke Slack bot/user token"""
# Use Slack Admin API
# Invalidate token and rotate if possible
```
#### 4.2 Meta-Learning System
```python
# /shhh-monitor/learning/meta_curator.py
class MetaCurator:
def __init__(self, llm_client):
self.llm = llm_client
async def analyze_false_positives(self, entries: List[QuarantineEntry]):
"""Use LLM to improve regex patterns"""
# Analyze patterns in false positives
# Generate regex refinements
# Submit for human approval
async def detect_new_secret_types(self, quarantine_history: List[QuarantineEntry]):
"""Identify new types of secrets to detect"""
# Look for patterns in undetected secrets
# Generate new regex proposals
# Calculate confidence scores
```
## Database Schema
### Core Tables
```sql
-- Quarantined messages
CREATE TABLE quarantine (
id SERIAL PRIMARY KEY,
timestamp TIMESTAMPTZ NOT NULL,
hypercore_position BIGINT NOT NULL,
bzzz_message_id TEXT NOT NULL,
secret_type TEXT NOT NULL,
severity TEXT CHECK (severity IN ('LOW', 'MEDIUM', 'HIGH', 'CRITICAL')),
confidence NUMERIC(3,2),
redacted_content TEXT NOT NULL,
full_content_hash TEXT NOT NULL, -- For audit purposes
reviewed BOOLEAN DEFAULT FALSE,
review_action TEXT, -- 'false_positive', 'confirmed', 'uncertain'
reviewer TEXT,
review_timestamp TIMESTAMPTZ
);
-- Pattern history and evolution
CREATE TABLE patterns_history (
id SERIAL PRIMARY KEY,
pattern_name TEXT NOT NULL,
old_regex TEXT,
new_regex TEXT,
action TEXT CHECK (action IN ('add', 'update', 'remove')),
confidence NUMERIC(3,2),
status TEXT CHECK (status IN ('approved', 'pending', 'rejected')),
submitted_by TEXT NOT NULL, -- 'human', 'meta_curator', 'feedback_system'
approved_by TEXT,
decision_reason TEXT,
timestamp TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
);
-- Revocation events tracking
CREATE TABLE revocations (
id SERIAL PRIMARY KEY,
quarantine_id INTEGER REFERENCES quarantine(id),
secret_type TEXT NOT NULL,
revocation_method TEXT NOT NULL, -- 'aws_api', 'github_api', 'manual'
status TEXT CHECK (status IN ('success', 'failed', 'pending')),
response_data JSONB, -- API response details
timestamp TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
);
-- Performance metrics
CREATE TABLE detection_metrics (
id SERIAL PRIMARY KEY,
date DATE NOT NULL,
total_messages_scanned INTEGER,
secrets_detected INTEGER,
false_positives INTEGER,
patterns_updated INTEGER,
avg_detection_latency_ms INTEGER
);
```
## Security Considerations
### 1. Secure Secret Storage
- **Never store actual secrets** in quarantine database
- Use **cryptographic hashes** for audit trails
- **Redact sensitive content** while preserving detection context
- Implement **secure deletion** for expired quarantine entries
### 2. Access Control
- **Role-based access** to dashboard (security admin, reviewer, read-only)
- **Audit logging** for all administrative actions
- **OAuth integration** with existing identity provider
- **API key authentication** for automated systems
### 3. Network Security
- **TLS encryption** for all API communication
- **VPN/private network** access to monitoring systems
- **Rate limiting** to prevent API abuse
- **IP allowlisting** for critical endpoints
## Deployment Architecture
### Development Environment
```yaml
# docker-compose.dev.yml
services:
shhh-monitor:
build: .
environment:
- DATABASE_URL=postgresql://dev:dev@postgres:5432/shhh_dev
- HYPERCORE_LOG_PATH=/data/hypercore.log
- BZZZ_CONFIG_PATH=/config/bzzz.yaml
volumes:
- ./data:/data
- ./config:/config
postgres:
image: postgres:15
environment:
POSTGRES_DB: shhh_dev
POSTGRES_USER: dev
POSTGRES_PASSWORD: dev
redis:
image: redis:7-alpine
# For caching and real-time notifications
```
### Production Deployment
```yaml
# docker-compose.prod.yml
services:
shhh-monitor:
image: registry.home.deepblack.cloud/tony/shhh-monitor:latest
deploy:
replicas: 2
placement:
constraints:
- node.role == manager
environment:
- DATABASE_URL=postgresql://shhh:${SHHH_DB_PASSWORD}@postgres:5432/shhh_prod
- HYPERCORE_LOG_PATH=/hypercore/current.log
networks:
- shhh_network
- tengig # For dashboard access
```
## Performance Requirements
### Latency Targets
- **Log Processing**: <50ms per hypercore entry
- **Secret Detection**: <10ms per BZZZ message
- **Alert Generation**: <100ms for critical secrets
- **Dashboard Response**: <200ms for UI queries
### Throughput Targets
- **Message Scanning**: 1000 messages/second
- **Concurrent Users**: 10+ dashboard users
- **Alert Volume**: 100+ alerts/hour during peak
- **Database Queries**: <5ms average response time
## Monitoring & Observability
### Metrics Collection
```python
# Prometheus metrics
messages_scanned_total = Counter('shhh_messages_scanned_total')
secrets_detected_total = Counter('shhh_secrets_detected_total', ['secret_type', 'severity'])
detection_latency = Histogram('shhh_detection_latency_seconds')
quarantine_size = Gauge('shhh_quarantine_entries_total')
```
### Health Checks
- **Hypercore connectivity**: Verify log file access
- **Database health**: Connection pool status
- **BZZZ integration**: P2P network connectivity
- **Alert system**: Webhook endpoint validation
### Logging Strategy
```python
# Structured logging with correlation IDs
{
"timestamp": "2025-08-02T13:45:00Z",
"level": "WARNING",
"event": "secret_detected",
"correlation_id": "req_123",
"secret_type": "AWS_ACCESS_KEY",
"severity": "HIGH",
"hypercore_position": 58321,
"bzzz_message_id": "msg_abc123",
"redacted_content": "AKIA****XYZ found in agent message"
}
```
## Testing Strategy
### Unit Tests
- **Regex pattern validation**: Test against known secret formats
- **Message parsing**: Verify hypercore and BZZZ format handling
- **Quarantine logic**: Test storage and retrieval functions
- **Alert generation**: Mock webhook endpoint testing
### Integration Tests
- **End-to-end workflow**: Log Detection Quarantine Alert
- **Database operations**: PostgreSQL CRUD operations
- **BZZZ integration**: Message interception and blocking
- **API endpoints**: FastAPI route testing
### Security Tests
- **Input validation**: SQL injection, XSS prevention
- **Access control**: Role-based permission testing
- **Data protection**: Verify secret redaction and hashing
- **Performance**: Load testing with high message volume
## Rollout Plan
### Phase 1: Foundation (Weeks 1-2)
- Core monitoring system with hypercore integration
- Basic secret detection using SHHH patterns
- PostgreSQL quarantine storage
- Simple alerting via webhooks
### Phase 2: Integration (Weeks 3-4)
- BZZZ network message interception
- Real-time processing pipeline
- Enhanced pattern management
- Performance optimization
### Phase 3: Dashboard (Weeks 5-6)
- FastAPI backend with full CRUD operations
- React dashboard for quarantine management
- Pattern editor and approval workflow
- Security metrics and reporting
### Phase 4: Automation (Weeks 7-8)
- Automated secret revocation APIs
- Meta-learning system for pattern improvement
- Production deployment and monitoring
- Documentation and team training
## Success Criteria
### Security Effectiveness
- **Zero secret leaks** in BZZZ P2P network after deployment
- **<1% false positive rate** for secret detection
- **<30 seconds** average time to detect and quarantine secrets
- **99.9% uptime** for monitoring system
### Operational Excellence
- **Complete audit trail** for all security events
- **Self-improving** pattern detection through feedback
- **Scalable architecture** supporting growth in CHORUS usage
- **Team adoption** with trained security administrators
## Risk Mitigation
### Technical Risks
- **Performance impact**: Monitor hypercore processing overhead
- **False positives**: Implement feedback loop for pattern refinement
- **BZZZ integration**: Maintain compatibility with P2P protocol evolution
- **Data loss**: Backup quarantine database and implement recovery procedures
### Security Risks
- **Bypassing detection**: Regular pattern updates and meta-learning
- **System compromise**: Network isolation and access controls
- **Secret exposure**: Implement proper redaction and audit procedures
- **Alert fatigue**: Tune detection thresholds to minimize noise
## Conclusion
This SHHH Hypercore Log Monitor provides comprehensive protection against secret leakage in the CHORUS Services BZZZ P2P network. By implementing real-time detection, automated response, and continuous learning, we ensure that sensitive information remains secure while maintaining the performance and functionality of the distributed AI orchestration platform.
The system builds upon the existing SHHH framework while adding the specific hypercore and BZZZ integrations needed for CHORUS Services. The phased rollout ensures stability and allows for iterative improvement based on real-world usage patterns.

View File

@@ -0,0 +1,251 @@
# 🛡️ CHORUS Services Secrets Sentinel Agent - System Prompt
## Agent Role & Mission
You are the **Secrets Sentinel**, a specialized security agent responsible for monitoring the CHORUS Services hypercore log and BZZZ P2P network messages to detect, quarantine, and prevent the leakage of sensitive credentials and secrets.
## Core Responsibilities
### 🔍 **Detection & Analysis**
- **Real-time Log Monitoring**: Continuously scan hypercore log entries for secret patterns
- **BZZZ Message Inspection**: Analyze P2P messages before they propagate across the network
- **Pattern Recognition**: Apply sophisticated regex patterns to identify various secret types
- **Context Analysis**: Understand the context around detected patterns to minimize false positives
### 🚨 **Immediate Response Actions**
- **Redaction**: Immediately redact detected secrets while preserving log context
- **Quarantine**: Isolate HIGH severity log entries from normal processing
- **Network Blocking**: Prevent BZZZ messages containing secrets from propagating
- **Alert Generation**: Send immediate notifications to security team
### 🔄 **Automated Remediation**
- **Revocation Triggers**: Automatically trigger webhook-based secret revocation
- **API Integration**: Interface with AWS, GitHub, Slack APIs for immediate credential deactivation
- **Audit Trail**: Maintain complete records of all detection and remediation actions
### 🧠 **Adaptive Learning**
- **Pattern Evolution**: Update detection rules based on feedback and new secret types
- **False Positive Reduction**: Refine patterns based on curator feedback
- **Confidence Scoring**: Assign confidence levels to detections for proper escalation
## Detection Patterns & Rules
### **High Severity Secrets (Immediate Quarantine + Revocation)**
```yaml
AWS_ACCESS_KEY:
regex: "AKIA[0-9A-Z]{16}"
severity: "CRITICAL"
confidence: 0.95
action: "quarantine_and_revoke"
PRIVATE_KEY:
regex: "-----BEGIN [A-Z ]*PRIVATE KEY-----"
severity: "CRITICAL"
confidence: 0.98
action: "quarantine_and_revoke"
GITHUB_TOKEN:
regex: "ghp_[0-9A-Za-z]{36}"
severity: "HIGH"
confidence: 0.92
action: "quarantine_and_revoke"
```
### **Medium Severity Secrets (Quarantine + Alert)**
```yaml
JWT_TOKEN:
regex: "eyJ[A-Za-z0-9_-]+?\\.[A-Za-z0-9_-]+?\\.[A-Za-z0-9_-]+?"
severity: "MEDIUM"
confidence: 0.85
action: "quarantine_and_alert"
SLACK_TOKEN:
regex: "xox[baprs]-[0-9A-Za-z-]{10,48}"
severity: "HIGH"
confidence: 0.90
action: "quarantine_and_revoke"
```
## Behavioral Guidelines
### **Detection Behavior**
1. **Scan Every Log Entry**: Process all hypercore entries in real-time
2. **Parse BZZZ Messages**: Extract and analyze P2P message payloads
3. **Apply Pattern Matching**: Use confidence-weighted regex patterns
4. **Context Preservation**: Maintain enough context for security analysis
### **Response Behavior**
1. **Immediate Action**: For CRITICAL/HIGH severity, act within seconds
2. **Graduated Response**: Different actions based on severity levels
3. **Human Escalation**: Flag uncertain cases for human review
4. **Audit Everything**: Log all actions with timestamps and reasons
### **Learning Behavior**
1. **Accept Feedback**: Process curator reports of false positives/missed secrets
2. **Pattern Refinement**: Propose regex updates based on feedback
3. **Version Control**: Track all pattern changes with confidence scores
4. **Human Approval**: Submit new patterns for security admin approval
## Operational Procedures
### **Log Entry Processing Workflow**
```
1. Receive hypercore log entry
2. Parse entry structure and extract content
3. If BZZZ message → extract P2P payload
4. Apply all active regex patterns
5. Calculate confidence scores
6. Determine severity level
7. Execute appropriate response action
8. Log detection event and actions taken
```
### **Quarantine Procedure**
```python
def quarantine_log_entry(entry, secret_type, confidence):
"""Quarantine sensitive log entry for security review"""
redacted_content = redact_secrets(entry.content)
quarantine_record = {
"timestamp": datetime.utcnow().isoformat() + "Z",
"hypercore_position": entry.position,
"secret_type": secret_type,
"severity": determine_severity(secret_type),
"confidence": confidence,
"redacted_content": redacted_content,
"content_hash": hash(entry.content), # For audit
"source_agent": entry.source_agent,
"reason": f"Secret detected: {secret_type}"
}
store_in_quarantine_db(quarantine_record)
if entry.is_bzzz_message:
block_bzzz_propagation(entry.message_id)
return quarantine_record
```
### **Revocation Trigger Procedure**
```python
def trigger_secret_revocation(secret_type, redacted_sample):
"""Trigger automated secret revocation via webhooks"""
revocation_payload = {
"event": "secret_leak_detected",
"secret_type": secret_type,
"redacted_key": redacted_sample,
"hypercore_position": current_position,
"severity": determine_severity(secret_type),
"recommended_action": get_revocation_action(secret_type),
"timestamp": datetime.utcnow().isoformat() + "Z"
}
webhook_url = REVOCATION_HOOKS.get(secret_type)
if webhook_url:
send_webhook(webhook_url, revocation_payload)
log_revocation_attempt(secret_type, "triggered")
```
## Communication Protocols
### **Alert Format for Security Team**
```json
{
"alert_id": "shhh_12345",
"timestamp": "2025-08-02T13:45:00Z",
"severity": "HIGH",
"secret_type": "AWS_ACCESS_KEY",
"source": "hypercore_position_58321",
"agent_source": "whoosh_orchestrator",
"redacted_content": "Found AWS key AKIA****XYZ in deployment config",
"confidence": 0.95,
"actions_taken": ["quarantined", "revocation_triggered"],
"next_steps": "Manual verification recommended"
}
```
### **Feedback Processing Format**
```json
{
"feedback_type": "false_positive|missed_secret|pattern_improvement",
"alert_id": "shhh_12345",
"secret_type": "AWS_ACCESS_KEY",
"evidence": "Key was test data: AKIA-TESTKEY-123",
"suggested_regex_fix": "AKIA[0-9A-Z]{16}(?!-TESTKEY)",
"reviewer": "security_admin",
"timestamp": "2025-08-02T14:00:00Z"
}
```
## Performance Requirements
### **Response Time Targets**
- **Detection Latency**: <50ms per log entry
- **Quarantine Action**: <100ms for high severity
- **Revocation Trigger**: <200ms for webhook dispatch
- **BZZZ Block**: <10ms to prevent propagation
### **Accuracy Standards**
- **False Positive Rate**: <2% for high confidence patterns
- **Detection Coverage**: >99% for known secret formats
- **Pattern Confidence**: Minimum 0.80 for active patterns
## Error Handling & Recovery
### **System Failures**
- **Database Connectivity**: Queue quarantine entries locally, sync when recovered
- **Webhook Failures**: Retry with exponential backoff, alert on continued failure
- **Pattern Loading**: Fall back to core patterns if external config unavailable
- **Log Processing**: Never skip entries, flag for manual review if uncertain
### **Security Incident Response**
- **Potential Breach**: Immediately escalate to security team
- **Pattern Bypass**: Alert security team, request pattern review
- **False Negative**: Update patterns, retroactively scan recent logs
- **System Compromise**: Quarantine all recent activity, manual investigation
## Integration Points
### **CHORUS Services Components**
- **Hypercore Log**: Primary data source for monitoring
- **BZZZ Network**: P2P message inspection and blocking capability
- **WHOOSH Orchestrator**: Agent activity monitoring
- **SLURP Context**: Context-aware secret detection
- **Security Dashboard**: Real-time alert display and management
### **External Systems**
- **AWS IAM**: Automated access key revocation
- **GitHub API**: Personal access token deactivation
- **Slack Admin API**: Bot/user token revocation
- **Security SIEM**: Alert forwarding and correlation
- **Audit System**: Compliance logging and reporting
## Continuous Improvement
### **Pattern Learning Process**
1. **Feedback Collection**: Gather curator reports on detection accuracy
2. **Pattern Analysis**: Identify common false positive/negative patterns
3. **Regex Generation**: Create new patterns using AI-assisted regex generation
4. **Confidence Assessment**: Test new patterns against historical data
5. **Human Review**: Submit high-confidence patterns for security admin approval
6. **Production Deployment**: Activate approved patterns with monitoring
### **Meta-Learning Capabilities**
- **Trend Analysis**: Identify emerging secret types and formats
- **Context Learning**: Improve understanding of legitimate vs. malicious patterns
- **Agent Behavior**: Learn which agents commonly handle sensitive data
- **Temporal Patterns**: Understand when secret leaks are most likely to occur
## Success Metrics
### **Security Effectiveness**
- **Zero secret propagation** in BZZZ P2P network post-deployment
- **Mean time to detection**: <1 minute for any secret exposure
- **Revocation success rate**: >95% for automated responses
- **Coverage improvement**: Continuous expansion of detectable secret types
### **Operational Excellence**
- **System uptime**: >99.9% availability for log monitoring
- **Processing throughput**: Handle 10,000+ log entries per minute
- **Alert quality**: <5% false positive rate for security team alerts
- **Response automation**: >90% of secrets handled without human intervention
You are now equipped to serve as the CHORUS Services Secrets Sentinel. Monitor vigilantly, respond swiftly, and continuously evolve your detection capabilities to protect our distributed AI orchestration platform from credential exposure and security breaches.
Remember: **Security is paramount. When in doubt, quarantine and escalate.**

View File

@@ -0,0 +1,4 @@
# SHHH API Module
"""
FastAPI dashboard and API endpoints for SHHH Secrets Sentinel.
"""

374
modules/shhh/api/main.py Normal file
View File

@@ -0,0 +1,374 @@
"""
FastAPI Dashboard Backend for SHHH Secrets Sentinel
Provides REST API endpoints for quarantine management and system monitoring.
"""
import asyncio
from datetime import datetime
from typing import List, Optional
from contextlib import asynccontextmanager
from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
import structlog
from .models import (
QuarantineEntryResponse, QuarantineReviewRequest, RevocationEventResponse,
PatternResponse, PatternUpdateRequest, StatsResponse, SystemHealthResponse,
ProcessingStatsResponse, AlertRequest, WebhookTestRequest, WebhookTestResponse,
PatternTestRequest, PatternTestResponse, SearchRequest, PaginatedResponse
)
from ..core.quarantine import QuarantineManager, QuarantineEntry
from ..core.detector import SecretDetector
from ..automation.revocation import SecretRevoker
from ..pipeline.processor import MessageProcessor
logger = structlog.get_logger()
# Global components (initialized in lifespan)
quarantine_manager: Optional[QuarantineManager] = None
detector: Optional[SecretDetector] = None
revoker: Optional[SecretRevoker] = None
processor: Optional[MessageProcessor] = None
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan manager"""
global quarantine_manager, detector, revoker, processor
try:
# Initialize components
logger.info("Initializing SHHH API components")
# These would normally come from configuration
config = {
'database_url': 'postgresql://shhh:password@localhost:5432/shhh_sentinel',
'patterns_file': 'patterns.yaml',
'revocation_webhooks': {
'AWS_ACCESS_KEY': 'https://security.chorus.services/hooks/aws-revoke',
'GITHUB_TOKEN': 'https://security.chorus.services/hooks/github-revoke',
'SLACK_TOKEN': 'https://security.chorus.services/hooks/slack-revoke'
}
}
# Initialize quarantine manager
quarantine_manager = QuarantineManager(config['database_url'])
await quarantine_manager.initialize()
# Initialize detector
detector = SecretDetector(config['patterns_file'])
# Initialize revoker
revoker = SecretRevoker(quarantine_manager, config['revocation_webhooks'])
logger.info("SHHH API components initialized successfully")
yield
except Exception as e:
logger.error(f"Failed to initialize SHHH API: {e}")
raise
finally:
# Cleanup
if quarantine_manager:
await quarantine_manager.close()
logger.info("SHHH API components shut down")
app = FastAPI(
title="SHHH Secrets Sentinel API",
description="REST API for managing secrets detection, quarantine, and response",
version="1.0.0",
lifespan=lifespan
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Configure appropriately for production
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Dependency functions
async def get_quarantine_manager() -> QuarantineManager:
if not quarantine_manager:
raise HTTPException(status_code=503, detail="Quarantine manager not available")
return quarantine_manager
async def get_detector() -> SecretDetector:
if not detector:
raise HTTPException(status_code=503, detail="Secret detector not available")
return detector
async def get_revoker() -> SecretRevoker:
if not revoker:
raise HTTPException(status_code=503, detail="Secret revoker not available")
return revoker
# Health and status endpoints
@app.get("/health", response_model=SystemHealthResponse)
async def get_health():
"""Get system health status"""
health = {
'status': 'healthy',
'timestamp': datetime.now(),
'components': {
'quarantine_manager': {
'initialized': quarantine_manager is not None,
'database_connected': quarantine_manager.pool is not None if quarantine_manager else False
},
'detector': {
'initialized': detector is not None,
'patterns_loaded': len(detector.patterns) if detector else 0
},
'revoker': {
'initialized': revoker is not None,
'webhooks_configured': len(revoker.webhook_config) if revoker else 0
}
}
}
return health
@app.get("/stats", response_model=StatsResponse)
async def get_stats(qm: QuarantineManager = Depends(get_quarantine_manager)):
"""Get quarantine statistics"""
stats = await qm.get_quarantine_stats()
return stats
# Quarantine management endpoints
@app.get("/quarantine", response_model=List[QuarantineEntryResponse])
async def get_quarantine_entries(
limit: int = 100,
offset: int = 0,
severity: Optional[str] = None,
reviewed: Optional[bool] = None,
qm: QuarantineManager = Depends(get_quarantine_manager)
):
"""Get quarantine entries with optional filters"""
entries = await qm.get_quarantine_entries(
limit=limit,
offset=offset,
severity_filter=severity,
reviewed_filter=reviewed
)
return [QuarantineEntryResponse(**entry.__dict__) for entry in entries]
@app.post("/quarantine/search", response_model=PaginatedResponse)
async def search_quarantine_entries(
search: SearchRequest,
qm: QuarantineManager = Depends(get_quarantine_manager)
):
"""Search quarantine entries with advanced filters"""
# This would implement more complex search logic
entries = await qm.get_quarantine_entries(
limit=search.limit,
offset=search.offset,
severity_filter=search.severity,
reviewed_filter=search.reviewed
)
items = [QuarantineEntryResponse(**entry.__dict__) for entry in entries]
return PaginatedResponse(
items=items,
total=len(items), # This would be the actual total from a count query
limit=search.limit,
offset=search.offset,
has_more=len(items) == search.limit
)
@app.post("/quarantine/{entry_id}/review")
async def review_quarantine_entry(
entry_id: int,
review: QuarantineReviewRequest,
qm: QuarantineManager = Depends(get_quarantine_manager)
):
"""Mark a quarantine entry as reviewed"""
success = await qm.mark_reviewed(entry_id, review.action, review.reviewer)
if not success:
raise HTTPException(status_code=404, detail="Quarantine entry not found")
return {"status": "success", "message": f"Entry {entry_id} marked as {review.action}"}
@app.get("/quarantine/{entry_id}")
async def get_quarantine_entry(
entry_id: int,
qm: QuarantineManager = Depends(get_quarantine_manager)
):
"""Get a specific quarantine entry by ID"""
# This would need to be implemented in QuarantineManager
raise HTTPException(status_code=501, detail="Not implemented yet")
# Pattern management endpoints
@app.get("/patterns", response_model=List[PatternResponse])
async def get_patterns(detector: SecretDetector = Depends(get_detector)):
"""Get all detection patterns"""
patterns = []
for name, config in detector.patterns.items():
patterns.append(PatternResponse(
name=name,
regex=config['regex'],
description=config.get('description', ''),
severity=config.get('severity', 'MEDIUM'),
confidence=config.get('confidence', 0.8),
active=config.get('active', True)
))
return patterns
@app.post("/patterns/{pattern_name}")
async def update_pattern(
pattern_name: str,
pattern: PatternUpdateRequest,
detector: SecretDetector = Depends(get_detector)
):
"""Update or create a detection pattern"""
# This would update the patterns.yaml file
# For now, just update in memory
detector.patterns[pattern_name] = {
'regex': pattern.regex,
'description': pattern.description,
'severity': pattern.severity,
'confidence': pattern.confidence,
'active': pattern.active
}
# Recompile regex
import re
try:
detector.patterns[pattern_name]['compiled_regex'] = re.compile(
pattern.regex, re.MULTILINE | re.DOTALL
)
except re.error as e:
raise HTTPException(status_code=400, detail=f"Invalid regex: {e}")
return {"status": "success", "message": f"Pattern {pattern_name} updated"}
@app.post("/patterns/{pattern_name}/test", response_model=PatternTestResponse)
async def test_pattern(
pattern_name: str,
test_request: PatternTestRequest,
detector: SecretDetector = Depends(get_detector)
):
"""Test a detection pattern against sample text"""
try:
matches = detector.test_pattern(pattern_name, test_request.test_text)
return PatternTestResponse(
matches=[match.__dict__ for match in matches],
match_count=len(matches)
)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
@app.get("/patterns/stats")
async def get_pattern_stats(detector: SecretDetector = Depends(get_detector)):
"""Get pattern statistics"""
return detector.get_pattern_stats()
# Revocation management endpoints
@app.get("/revocations", response_model=List[RevocationEventResponse])
async def get_revocations(
limit: int = 100,
offset: int = 0,
qm: QuarantineManager = Depends(get_quarantine_manager)
):
"""Get revocation events"""
# This would need to be implemented in QuarantineManager
raise HTTPException(status_code=501, detail="Not implemented yet")
@app.post("/revocations/test", response_model=WebhookTestResponse)
async def test_webhook(
test_request: WebhookTestRequest,
revoker: SecretRevoker = Depends(get_revoker)
):
"""Test a webhook endpoint"""
result = await revoker.test_webhook_endpoint(test_request.secret_type)
return WebhookTestResponse(**result)
@app.get("/revocations/stats")
async def get_revocation_stats(revoker: SecretRevoker = Depends(get_revoker)):
"""Get revocation statistics"""
return revoker.get_stats()
# Administrative endpoints
@app.post("/admin/cleanup")
async def cleanup_old_entries(
qm: QuarantineManager = Depends(get_quarantine_manager)
):
"""Clean up old quarantine entries"""
deleted_count = await qm.cleanup_old_entries()
return {"status": "success", "deleted_entries": deleted_count}
@app.post("/admin/reload-patterns")
async def reload_patterns(detector: SecretDetector = Depends(get_detector)):
"""Reload detection patterns from file"""
detector.load_patterns()
return {"status": "success", "message": "Patterns reloaded"}
@app.post("/admin/reset-stats")
async def reset_stats(revoker: SecretRevoker = Depends(get_revoker)):
"""Reset revocation statistics"""
revoker.reset_stats()
return {"status": "success", "message": "Statistics reset"}
# Monitoring endpoints
@app.get("/metrics/prometheus")
async def get_prometheus_metrics():
"""Get metrics in Prometheus format"""
# This would generate Prometheus-formatted metrics
raise HTTPException(status_code=501, detail="Prometheus metrics not implemented yet")
@app.get("/logs/recent")
async def get_recent_logs(limit: int = 100):
"""Get recent system logs"""
# This would return recent log entries
raise HTTPException(status_code=501, detail="Log endpoint not implemented yet")
# Error handlers
@app.exception_handler(Exception)
async def general_exception_handler(request, exc):
logger.error(f"Unhandled exception: {exc}")
return JSONResponse(
status_code=500,
content={"detail": "Internal server error"}
)
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"api.main:app",
host="127.0.0.1",
port=8000,
reload=True,
log_level="info"
)

149
modules/shhh/api/models.py Normal file
View File

@@ -0,0 +1,149 @@
"""
Pydantic models for SHHH API endpoints.
"""
from datetime import datetime
from typing import List, Dict, Any, Optional
from pydantic import BaseModel, Field
class QuarantineEntryResponse(BaseModel):
"""Response model for quarantine entries"""
id: int
timestamp: datetime
hypercore_position: int
bzzz_message_id: Optional[str] = None
secret_type: str
severity: str
confidence: float
redacted_content: str
content_hash: str
source_agent: str
match_count: int
reviewed: bool
review_action: Optional[str] = None
reviewer: Optional[str] = None
review_timestamp: Optional[datetime] = None
metadata: Dict[str, Any] = {}
class QuarantineReviewRequest(BaseModel):
"""Request model for reviewing quarantine entries"""
action: str = Field(..., description="Review action: 'false_positive', 'confirmed', 'uncertain'")
reviewer: str = Field(..., description="Name or ID of the reviewer")
notes: Optional[str] = Field(None, description="Optional review notes")
class RevocationEventResponse(BaseModel):
"""Response model for revocation events"""
id: int
quarantine_id: int
secret_type: str
revocation_method: str
status: str
response_data: Dict[str, Any] = {}
timestamp: datetime
class PatternResponse(BaseModel):
"""Response model for detection patterns"""
name: str
regex: str
description: str
severity: str
confidence: float
active: bool
class PatternUpdateRequest(BaseModel):
"""Request model for updating patterns"""
regex: str = Field(..., description="Regular expression pattern")
description: Optional[str] = Field(None, description="Pattern description")
severity: str = Field(..., description="Severity level: LOW, MEDIUM, HIGH, CRITICAL")
confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score")
active: bool = Field(True, description="Whether pattern is active")
class StatsResponse(BaseModel):
"""Response model for system statistics"""
total_entries: int
pending_review: int
critical_count: int
high_count: int
medium_count: int
low_count: int
last_24h: int
last_7d: int
class SystemHealthResponse(BaseModel):
"""Response model for system health"""
status: str
timestamp: datetime
components: Dict[str, Dict[str, Any]]
class ProcessingStatsResponse(BaseModel):
"""Response model for processing statistics"""
entries_processed: int
secrets_detected: int
messages_quarantined: int
revocations_triggered: int
processing_errors: int
uptime_hours: Optional[float] = None
entries_per_second: Optional[float] = None
secrets_per_hour: Optional[float] = None
is_running: bool
class AlertRequest(BaseModel):
"""Request model for manual alerts"""
message: str = Field(..., description="Alert message")
severity: str = Field(..., description="Alert severity")
source: str = Field(..., description="Alert source")
class WebhookTestRequest(BaseModel):
"""Request model for testing webhook endpoints"""
secret_type: str = Field(..., description="Secret type to test")
class WebhookTestResponse(BaseModel):
"""Response model for webhook tests"""
success: bool
method: Optional[str] = None
response_data: Dict[str, Any] = {}
error: Optional[str] = None
class PatternTestRequest(BaseModel):
"""Request model for testing detection patterns"""
pattern_name: str = Field(..., description="Name of pattern to test")
test_text: str = Field(..., description="Text to test against pattern")
class PatternTestResponse(BaseModel):
"""Response model for pattern testing"""
matches: List[Dict[str, Any]]
match_count: int
class SearchRequest(BaseModel):
"""Request model for searching quarantine entries"""
query: Optional[str] = Field(None, description="Search query")
secret_type: Optional[str] = Field(None, description="Filter by secret type")
severity: Optional[str] = Field(None, description="Filter by severity")
reviewed: Optional[bool] = Field(None, description="Filter by review status")
start_date: Optional[datetime] = Field(None, description="Start date filter")
end_date: Optional[datetime] = Field(None, description="End date filter")
limit: int = Field(100, ge=1, le=1000, description="Result limit")
offset: int = Field(0, ge=0, description="Result offset")
class PaginatedResponse(BaseModel):
"""Generic paginated response model"""
items: List[Any]
total: int
limit: int
offset: int
has_more: bool

View File

@@ -0,0 +1,4 @@
# SHHH Automation Module
"""
Automated response and revocation systems for secret detection.
"""

View File

@@ -0,0 +1,474 @@
"""
Automated Secret Revocation System for SHHH Secrets Sentinel
Provides automated response capabilities for different types of detected secrets.
"""
import asyncio
import aiohttp
import json
import time
from typing import Dict, Any, Optional, List
from dataclasses import dataclass
from datetime import datetime
import structlog
from ..core.quarantine import QuarantineEntry, RevocationEvent, QuarantineManager
from ..core.detector import SecretMatch
logger = structlog.get_logger()
@dataclass
class RevocationRequest:
"""Represents a request to revoke a secret"""
quarantine_id: int
secret_type: str
redacted_secret: str
urgency: str # 'immediate', 'high', 'medium', 'low'
metadata: Dict[str, Any]
@dataclass
class RevocationResponse:
"""Represents the response from a revocation attempt"""
success: bool
method: str
response_data: Dict[str, Any]
error_message: Optional[str] = None
revocation_id: Optional[str] = None
class SecretRevoker:
"""
Automated secret revocation system that integrates with various cloud providers
and services to automatically disable compromised credentials.
"""
def __init__(self, quarantine_manager: QuarantineManager, webhook_config: Dict[str, str] = None):
self.quarantine = quarantine_manager
self.webhook_config = webhook_config or {}
# Revocation timeouts and retry settings
self.request_timeout = 10 # seconds
self.max_retries = 3
self.retry_delay = 2 # seconds
# Statistics
self.stats = {
'total_revocations': 0,
'successful_revocations': 0,
'failed_revocations': 0,
'revocations_by_type': {},
'last_reset': datetime.now()
}
logger.info("Initialized SecretRevoker")
async def trigger_revocation(self, quarantine_entry: QuarantineEntry) -> Optional[RevocationResponse]:
"""Trigger automatic revocation for a quarantined secret"""
try:
revocation_request = RevocationRequest(
quarantine_id=quarantine_entry.id,
secret_type=quarantine_entry.secret_type,
redacted_secret=self._extract_redacted_from_metadata(quarantine_entry),
urgency=self._determine_urgency(quarantine_entry.severity),
metadata={
'source_agent': quarantine_entry.source_agent,
'detection_timestamp': quarantine_entry.timestamp.isoformat(),
'confidence': quarantine_entry.confidence,
'hypercore_position': quarantine_entry.hypercore_position
}
)
# Determine revocation method
revocation_method = self._get_revocation_method(quarantine_entry.secret_type)
if not revocation_method:
logger.warning(f"No revocation method configured for {quarantine_entry.secret_type}")
return None
# Attempt revocation
response = await self._execute_revocation(revocation_request, revocation_method)
# Record the revocation event
await self._record_revocation_event(quarantine_entry, response)
# Update statistics
self._update_stats(quarantine_entry.secret_type, response.success)
return response
except Exception as e:
logger.error(f"Failed to trigger revocation for quarantine {quarantine_entry.id}: {e}")
return None
def _extract_redacted_from_metadata(self, quarantine_entry: QuarantineEntry) -> str:
"""Extract redacted secret from quarantine metadata"""
try:
matches = quarantine_entry.metadata.get('matches', [])
if matches:
# Get the first match's redacted text
return matches[0].get('redacted_text', 'REDACTED')
except:
pass
return 'REDACTED'
def _determine_urgency(self, severity: str) -> str:
"""Determine revocation urgency based on severity"""
urgency_map = {
'CRITICAL': 'immediate',
'HIGH': 'high',
'MEDIUM': 'medium',
'LOW': 'low'
}
return urgency_map.get(severity, 'medium')
def _get_revocation_method(self, secret_type: str) -> Optional[str]:
"""Get the appropriate revocation method for a secret type"""
method_map = {
'AWS_ACCESS_KEY': 'aws_iam_revocation',
'AWS_SECRET_KEY': 'aws_iam_revocation',
'GITHUB_TOKEN': 'github_token_revocation',
'GITHUB_OAUTH': 'github_token_revocation',
'SLACK_TOKEN': 'slack_token_revocation',
'GOOGLE_API_KEY': 'google_api_revocation',
'DOCKER_TOKEN': 'docker_token_revocation'
}
return method_map.get(secret_type)
async def _execute_revocation(self, request: RevocationRequest, method: str) -> RevocationResponse:
"""Execute the actual revocation based on the method"""
method_handlers = {
'aws_iam_revocation': self._revoke_aws_credentials,
'github_token_revocation': self._revoke_github_token,
'slack_token_revocation': self._revoke_slack_token,
'google_api_revocation': self._revoke_google_api_key,
'docker_token_revocation': self._revoke_docker_token,
'webhook_revocation': self._revoke_via_webhook
}
handler = method_handlers.get(method, self._revoke_via_webhook)
for attempt in range(self.max_retries):
try:
response = await handler(request)
if response.success:
logger.info(
f"Successfully revoked {request.secret_type}",
quarantine_id=request.quarantine_id,
method=method,
attempt=attempt + 1
)
return response
# Log failure and retry if not successful
logger.warning(
f"Revocation attempt {attempt + 1} failed",
quarantine_id=request.quarantine_id,
method=method,
error=response.error_message
)
if attempt < self.max_retries - 1:
await asyncio.sleep(self.retry_delay * (attempt + 1)) # Exponential backoff
except Exception as e:
logger.error(f"Revocation attempt {attempt + 1} error: {e}")
if attempt < self.max_retries - 1:
await asyncio.sleep(self.retry_delay * (attempt + 1))
# All attempts failed
return RevocationResponse(
success=False,
method=method,
response_data={},
error_message=f"All {self.max_retries} revocation attempts failed"
)
async def _revoke_aws_credentials(self, request: RevocationRequest) -> RevocationResponse:
"""Revoke AWS credentials via webhook"""
webhook_url = self.webhook_config.get('AWS_ACCESS_KEY')
if not webhook_url:
return RevocationResponse(
success=False,
method='aws_iam_revocation',
response_data={},
error_message="No AWS revocation webhook configured"
)
payload = {
'event': 'secret_leak_detected',
'secret_type': request.secret_type,
'redacted_key': request.redacted_secret,
'urgency': request.urgency,
'quarantine_id': request.quarantine_id,
'timestamp': datetime.now().isoformat(),
'recommended_action': 'Revoke IAM access key immediately',
'metadata': request.metadata
}
return await self._send_webhook_request(webhook_url, payload, 'aws_iam_revocation')
async def _revoke_github_token(self, request: RevocationRequest) -> RevocationResponse:
"""Revoke GitHub token via webhook"""
webhook_url = self.webhook_config.get('GITHUB_TOKEN')
if not webhook_url:
return RevocationResponse(
success=False,
method='github_token_revocation',
response_data={},
error_message="No GitHub revocation webhook configured"
)
payload = {
'event': 'secret_leak_detected',
'secret_type': request.secret_type,
'redacted_key': request.redacted_secret,
'urgency': request.urgency,
'quarantine_id': request.quarantine_id,
'timestamp': datetime.now().isoformat(),
'recommended_action': 'Revoke GitHub token via API or settings',
'metadata': request.metadata
}
return await self._send_webhook_request(webhook_url, payload, 'github_token_revocation')
async def _revoke_slack_token(self, request: RevocationRequest) -> RevocationResponse:
"""Revoke Slack token via webhook"""
webhook_url = self.webhook_config.get('SLACK_TOKEN')
if not webhook_url:
return RevocationResponse(
success=False,
method='slack_token_revocation',
response_data={},
error_message="No Slack revocation webhook configured"
)
payload = {
'event': 'secret_leak_detected',
'secret_type': request.secret_type,
'redacted_key': request.redacted_secret,
'urgency': request.urgency,
'quarantine_id': request.quarantine_id,
'timestamp': datetime.now().isoformat(),
'recommended_action': 'Revoke Slack token via Admin API',
'metadata': request.metadata
}
return await self._send_webhook_request(webhook_url, payload, 'slack_token_revocation')
async def _revoke_google_api_key(self, request: RevocationRequest) -> RevocationResponse:
"""Revoke Google API key via webhook"""
webhook_url = self.webhook_config.get('GOOGLE_API_KEY')
if not webhook_url:
return RevocationResponse(
success=False,
method='google_api_revocation',
response_data={},
error_message="No Google API revocation webhook configured"
)
payload = {
'event': 'secret_leak_detected',
'secret_type': request.secret_type,
'redacted_key': request.redacted_secret,
'urgency': request.urgency,
'quarantine_id': request.quarantine_id,
'timestamp': datetime.now().isoformat(),
'recommended_action': 'Revoke API key via Google Cloud Console',
'metadata': request.metadata
}
return await self._send_webhook_request(webhook_url, payload, 'google_api_revocation')
async def _revoke_docker_token(self, request: RevocationRequest) -> RevocationResponse:
"""Revoke Docker token via webhook"""
webhook_url = self.webhook_config.get('DOCKER_TOKEN')
if not webhook_url:
return RevocationResponse(
success=False,
method='docker_token_revocation',
response_data={},
error_message="No Docker revocation webhook configured"
)
payload = {
'event': 'secret_leak_detected',
'secret_type': request.secret_type,
'redacted_key': request.redacted_secret,
'urgency': request.urgency,
'quarantine_id': request.quarantine_id,
'timestamp': datetime.now().isoformat(),
'recommended_action': 'Revoke Docker token via Hub settings',
'metadata': request.metadata
}
return await self._send_webhook_request(webhook_url, payload, 'docker_token_revocation')
async def _revoke_via_webhook(self, request: RevocationRequest) -> RevocationResponse:
"""Generic webhook revocation for unknown secret types"""
# Try to find a generic webhook endpoint
webhook_url = self.webhook_config.get('GENERIC',
self.webhook_config.get('DEFAULT'))
if not webhook_url:
return RevocationResponse(
success=False,
method='webhook_revocation',
response_data={},
error_message=f"No webhook configured for {request.secret_type}"
)
payload = {
'event': 'secret_leak_detected',
'secret_type': request.secret_type,
'redacted_key': request.redacted_secret,
'urgency': request.urgency,
'quarantine_id': request.quarantine_id,
'timestamp': datetime.now().isoformat(),
'recommended_action': 'Manual review and revocation required',
'metadata': request.metadata
}
return await self._send_webhook_request(webhook_url, payload, 'webhook_revocation')
async def _send_webhook_request(self, url: str, payload: Dict[str, Any], method: str) -> RevocationResponse:
"""Send webhook request and handle response"""
try:
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=self.request_timeout)) as session:
async with session.post(url, json=payload) as response:
response_data = {}
try:
response_data = await response.json()
except:
response_data = {'text': await response.text()}
if response.status == 200:
return RevocationResponse(
success=True,
method=method,
response_data=response_data,
revocation_id=response_data.get('revocation_id')
)
else:
return RevocationResponse(
success=False,
method=method,
response_data=response_data,
error_message=f"HTTP {response.status}: {response_data}"
)
except asyncio.TimeoutError:
return RevocationResponse(
success=False,
method=method,
response_data={},
error_message=f"Webhook request timed out after {self.request_timeout}s"
)
except Exception as e:
return RevocationResponse(
success=False,
method=method,
response_data={},
error_message=f"Webhook request failed: {str(e)}"
)
async def _record_revocation_event(self, quarantine_entry: QuarantineEntry, response: RevocationResponse):
"""Record revocation event in the database"""
try:
revocation_event = RevocationEvent(
quarantine_id=quarantine_entry.id,
secret_type=quarantine_entry.secret_type,
revocation_method=response.method,
status='success' if response.success else 'failed',
response_data=response.response_data,
timestamp=datetime.now()
)
await self.quarantine.record_revocation(revocation_event)
except Exception as e:
logger.error(f"Failed to record revocation event: {e}")
def _update_stats(self, secret_type: str, success: bool):
"""Update revocation statistics"""
self.stats['total_revocations'] += 1
if success:
self.stats['successful_revocations'] += 1
else:
self.stats['failed_revocations'] += 1
# Update by-type stats
if secret_type not in self.stats['revocations_by_type']:
self.stats['revocations_by_type'][secret_type] = {
'total': 0,
'successful': 0,
'failed': 0
}
type_stats = self.stats['revocations_by_type'][secret_type]
type_stats['total'] += 1
if success:
type_stats['successful'] += 1
else:
type_stats['failed'] += 1
async def test_webhook_endpoint(self, secret_type: str) -> Dict[str, Any]:
"""Test a webhook endpoint with a test payload"""
webhook_url = self.webhook_config.get(secret_type)
if not webhook_url:
return {
'success': False,
'error': f'No webhook configured for {secret_type}'
}
test_payload = {
'event': 'webhook_test',
'secret_type': secret_type,
'test': True,
'timestamp': datetime.now().isoformat()
}
try:
response = await self._send_webhook_request(webhook_url, test_payload, 'test')
return {
'success': response.success,
'method': response.method,
'response_data': response.response_data,
'error': response.error_message
}
except Exception as e:
return {
'success': False,
'error': str(e)
}
def get_stats(self) -> Dict[str, Any]:
"""Get revocation statistics"""
current_time = datetime.now()
uptime_hours = (current_time - self.stats['last_reset']).total_seconds() / 3600
stats = self.stats.copy()
stats.update({
'uptime_hours': round(uptime_hours, 2),
'success_rate': (
self.stats['successful_revocations'] / max(1, self.stats['total_revocations'])
) * 100,
'configured_webhooks': list(self.webhook_config.keys())
})
return stats
def reset_stats(self):
"""Reset statistics counters"""
self.stats = {
'total_revocations': 0,
'successful_revocations': 0,
'failed_revocations': 0,
'revocations_by_type': {},
'last_reset': datetime.now()
}
logger.info("SecretRevoker statistics reset")

33
modules/shhh/config.yaml Normal file
View File

@@ -0,0 +1,33 @@
# Configuration for the SHHH Secrets Sentinel
# -- File Paths --
# Path to the primary, raw hypercore log to be monitored.
primary_log_path: '/home/tony/AI/projects/chorus.services/modules/shhh/primary.log'
# Path where the sanitized sister hypercore log will be written.
sanitized_log_path: '/home/tony/AI/projects/chorus.services/modules/shhh/sanitized.log'
# Path to the YAML file containing regex patterns for secret detection.
patterns_file: 'patterns.yaml'
# Path to the system prompt file for the LLM agent.
shhh_agent_prompt_file: 'SHHH_SECRETS_SENTINEL_AGENT_PROMPT.md'
# -- Database --
# Connection string for the PostgreSQL database used for quarantining secrets.
# Format: postgresql://user:password@host:port/database
database_url: 'postgresql://shhh:password@localhost:5432/shhh_sentinel'
# -- LLM Analyzer (Ollama) --
# The API endpoint for the Ollama instance.
ollama_endpoint: 'http://localhost:11434/api/generate'
# The name of the model to use from Ollama (e.g., llama3, codellama).
ollama_model: 'llama3'
# The confidence score threshold for regex matches.
# Matches with confidence >= this value will be quarantined immediately, skipping the LLM.
# Matches with confidence < this value will be sent to the LLM for verification.
llm_confidence_threshold: 0.90

View File

@@ -0,0 +1,6 @@
# SHHH Core Module
"""
Core components for the SHHH Secrets Sentinel system.
"""
__version__ = "1.0.0"

View File

@@ -0,0 +1,52 @@
import re
import yaml
from pathlib import Path
class SecretDetector:
"""
A simplified secret detection engine using configurable regex patterns.
It scans text for secrets, redacts them, and provides metadata.
"""
def __init__(self, patterns_file: str = "patterns.yaml"):
self.patterns_file = Path(patterns_file)
self.patterns = self._load_patterns()
def _load_patterns(self) -> dict:
"""Load detection patterns from YAML configuration."""
try:
with open(self.patterns_file, 'r') as f:
config = yaml.safe_load(f)
patterns = config.get('patterns', {})
# Pre-compile regex for efficiency
for name, props in patterns.items():
if props.get('active', True):
props['compiled_regex'] = re.compile(props['regex'])
return patterns
except Exception as e:
print(f"[ERROR] Failed to load patterns from {self.patterns_file}: {e}")
return {}
def scan(self, text: str) -> list[dict]:
"""Scans text and returns a list of found secrets with metadata."""
matches = []
for pattern_name, pattern in self.patterns.items():
if pattern.get('active', True) and 'compiled_regex' in pattern:
regex_match = pattern['compiled_regex'].search(text)
if regex_match:
matches.append({
"secret_type": pattern_name,
"value": regex_match.group(0),
"confidence": pattern.get("confidence", 0.8),
"severity": pattern.get("severity", "MEDIUM")
})
return matches
def redact(self, text: str, secret_value: str) -> str:
"""Redacts a specific secret value within a string."""
# Ensure we don't reveal too much for very short secrets
if len(secret_value) < 8:
return text.replace(secret_value, "[REDACTED]")
redacted_str = secret_value[:4] + "****" + secret_value[-4:]
return text.replace(secret_value, f"[REDACTED:{redacted_str}]")

View File

@@ -0,0 +1,35 @@
import asyncio
from datetime import datetime
class LogEntry:
"""A mock log entry object for testing purposes."""
def __init__(self, content):
self.content = content
self.timestamp = datetime.now()
# Add other fields as needed to match the processor's expectations
self.source_agent = "mock_agent"
self.message_type = "mock_message"
self.metadata = {}
self.is_bzzz_message = False
self.bzzz_message_id = None
class HypercoreReader:
"""
A simplified, mock HypercoreReader that reads from a plain text file
to simulate a stream of log entries for testing.
"""
def __init__(self, log_path: str, **kwargs):
self.log_path = log_path
async def stream_entries(self):
"""
An async generator that yields log entries from a text file.
"""
try:
with open(self.log_path, 'r') as f:
for line in f:
yield LogEntry(line.strip())
await asyncio.sleep(0.01) # Simulate async behavior
except FileNotFoundError:
print(f"[ERROR] Hypercore log file not found at: {self.log_path}")
return

View File

@@ -0,0 +1,44 @@
import requests
import json
class LLMAnalyzer:
"""Analyzes text for secrets using a local LLM via Ollama."""
def __init__(self, endpoint: str, model: str, system_prompt: str):
self.endpoint = endpoint
self.model = model
self.system_prompt = system_prompt
def analyze(self, text: str) -> dict:
"""
Sends text to the Ollama API for analysis and returns a structured JSON response.
Returns:
A dictionary like:
{
"secret_found": bool,
"secret_type": str,
"confidence_score": float,
"severity": str
}
Returns a default "not found" response on error.
"""
prompt = f"Log entry: \"{text}\"\n\nAnalyze this for secrets and respond with only the required JSON."
payload = {
"model": self.model,
"system": self.system_prompt,
"prompt": prompt,
"format": "json",
"stream": False
}
try:
response = requests.post(self.endpoint, json=payload, timeout=15)
response.raise_for_status()
# The response from Ollama is a JSON string, which needs to be parsed.
analysis = json.loads(response.json().get("response", "{}"))
return analysis
except (requests.exceptions.RequestException, json.JSONDecodeError) as e:
print(f"[ERROR] LLMAnalyzer failed: {e}")
# Fallback: If LLM fails, assume no secret was found to avoid blocking the pipeline.
return {"secret_found": False}

View File

@@ -0,0 +1,22 @@
from datetime import datetime
class QuarantineManager:
"""
A simplified, mock QuarantineManager for testing purposes.
It prints quarantined messages to the console instead of saving to a database.
"""
def __init__(self, database_url: str, **kwargs):
print(f"[MockQuarantine] Initialized with db_url: {database_url}")
def quarantine_message(self, message, secret_type: str, severity: str, redacted_content: str):
"""
Prints a quarantined message to the console.
"""
print("\n--- QUARANTINE ALERT ---")
print(f"Timestamp: {datetime.now().isoformat()}")
print(f"Severity: {severity}")
print(f"Secret Type: {secret_type}")
print(f"Original Content (from mock): {message.content}")
print(f"Redacted Content: {redacted_content}")
print("------------------------\n")

View File

@@ -0,0 +1,16 @@
class SanitizedWriter:
"""Writes log entries to the sanitized sister hypercore log."""
def __init__(self, sanitized_log_path: str):
self.log_path = sanitized_log_path
# Placeholder for hypercore writing logic. For now, we'll append to a file.
self.log_file = open(self.log_path, "a")
def write(self, log_entry: str):
"""Writes a single log entry to the sanitized stream."""
self.log_file.write(log_entry + "\n")
self.log_file.flush()
def close(self):
self.log_file.close()

View File

@@ -0,0 +1,4 @@
# SHHH Integrations Module
"""
Integration components for BZZZ network and external systems.
"""

View File

@@ -0,0 +1,369 @@
"""
BZZZ Message Interceptor for SHHH Secrets Sentinel
Intercepts and validates BZZZ P2P messages before network propagation.
"""
import asyncio
import json
import time
from typing import Dict, Any, Optional, Set, Callable
from dataclasses import dataclass
from datetime import datetime
import structlog
from ..core.hypercore_reader import BzzzMessage
from ..core.detector import SecretDetector, DetectionResult
from ..core.quarantine import QuarantineManager
logger = structlog.get_logger()
@dataclass
class BlockedMessage:
"""Represents a blocked BZZZ message"""
message_id: str
sender_agent: str
block_reason: str
secret_types: list
timestamp: datetime
quarantine_id: Optional[int] = None
class BzzzInterceptor:
"""
Intercepts BZZZ P2P messages before transmission to prevent secret leakage.
Integrates with the BZZZ network layer to scan messages in real-time.
"""
def __init__(
self,
detector: SecretDetector,
quarantine_manager: QuarantineManager,
bzzz_config: Dict[str, Any] = None
):
self.detector = detector
self.quarantine = quarantine_manager
self.bzzz_config = bzzz_config or {}
# Message blocking state
self.blocked_messages: Dict[str, BlockedMessage] = {}
self.message_hooks: Set[Callable] = set()
self.is_active = False
# Statistics
self.stats = {
'total_scanned': 0,
'secrets_detected': 0,
'messages_blocked': 0,
'false_positives': 0,
'last_reset': datetime.now()
}
logger.info("Initialized BzzzInterceptor")
async def start(self):
"""Start the BZZZ message interception service"""
self.is_active = True
logger.info("BZZZ Interceptor started - all outgoing messages will be scanned")
async def stop(self):
"""Stop the BZZZ message interception service"""
self.is_active = False
logger.info("BZZZ Interceptor stopped")
def install_message_hook(self, hook_function: Callable):
"""Install a message hook for BZZZ network integration"""
self.message_hooks.add(hook_function)
logger.info(f"Installed BZZZ message hook: {hook_function.__name__}")
def remove_message_hook(self, hook_function: Callable):
"""Remove a message hook"""
self.message_hooks.discard(hook_function)
logger.info(f"Removed BZZZ message hook: {hook_function.__name__}")
async def intercept_outgoing_message(self, message: BzzzMessage) -> bool:
"""
Intercept and scan an outgoing BZZZ message.
Returns True if message should be allowed, False if blocked.
"""
if not self.is_active:
return True # Pass through if interceptor is inactive
start_time = time.time()
self.stats['total_scanned'] += 1
try:
# Scan message for secrets
detection_result = self.detector.scan_bzzz_message(message)
if detection_result.has_secrets:
await self._handle_secret_detection(message, detection_result)
return False # Block message
# Message is clean, allow transmission
processing_time = (time.time() - start_time) * 1000
logger.debug(
f"BZZZ message scanned clean",
message_id=message.message_id,
sender=message.sender_agent,
processing_time_ms=processing_time
)
return True
except Exception as e:
logger.error(f"Error intercepting BZZZ message: {e}")
# On error, default to blocking for security
await self._block_message_on_error(message, str(e))
return False
async def _handle_secret_detection(self, message: BzzzMessage, detection_result: DetectionResult):
"""Handle detection of secrets in a BZZZ message"""
self.stats['secrets_detected'] += 1
self.stats['messages_blocked'] += 1
# Extract secret types for blocking record
secret_types = [match.secret_type for match in detection_result.matches]
# Quarantine the detection result
quarantine_entry = await self.quarantine.quarantine_detection(detection_result)
# Create blocked message record
blocked_msg = BlockedMessage(
message_id=message.message_id,
sender_agent=message.sender_agent,
block_reason=f"Secrets detected: {', '.join(secret_types)}",
secret_types=secret_types,
timestamp=datetime.now(),
quarantine_id=quarantine_entry.id
)
self.blocked_messages[message.message_id] = blocked_msg
# Notify BZZZ network layer
await self._notify_message_blocked(message, blocked_msg)
logger.critical(
f"BLOCKED BZZZ message containing secrets",
message_id=message.message_id,
sender=message.sender_agent,
recipient=message.recipient_agent,
secret_types=secret_types,
severity=detection_result.max_severity,
quarantine_id=quarantine_entry.id
)
async def _block_message_on_error(self, message: BzzzMessage, error_msg: str):
"""Block a message due to processing error"""
self.stats['messages_blocked'] += 1
blocked_msg = BlockedMessage(
message_id=message.message_id,
sender_agent=message.sender_agent,
block_reason=f"Processing error: {error_msg}",
secret_types=[],
timestamp=datetime.now()
)
self.blocked_messages[message.message_id] = blocked_msg
await self._notify_message_blocked(message, blocked_msg)
logger.error(
f"BLOCKED BZZZ message due to error",
message_id=message.message_id,
sender=message.sender_agent,
error=error_msg
)
async def _notify_message_blocked(self, message: BzzzMessage, blocked_msg: BlockedMessage):
"""Notify BZZZ network and sender about blocked message"""
notification = {
'event': 'message_blocked',
'message_id': message.message_id,
'sender_agent': message.sender_agent,
'recipient_agent': message.recipient_agent,
'block_reason': blocked_msg.block_reason,
'secret_types': blocked_msg.secret_types,
'timestamp': blocked_msg.timestamp.isoformat(),
'quarantine_id': blocked_msg.quarantine_id
}
# Notify all registered hooks
for hook in self.message_hooks:
try:
await self._call_hook_safely(hook, 'message_blocked', notification)
except Exception as e:
logger.warning(f"Hook {hook.__name__} failed: {e}")
# Send notification back to sender agent
await self._notify_sender_agent(message.sender_agent, notification)
async def _call_hook_safely(self, hook: Callable, event_type: str, data: Dict[str, Any]):
"""Safely call a hook function with error handling"""
try:
if asyncio.iscoroutinefunction(hook):
await hook(event_type, data)
else:
hook(event_type, data)
except Exception as e:
logger.warning(f"Hook {hook.__name__} failed: {e}")
async def _notify_sender_agent(self, sender_agent: str, notification: Dict[str, Any]):
"""Send notification to the sender agent about blocked message"""
try:
# This would integrate with the BZZZ network's agent communication system
# For now, we'll log the notification
logger.info(
f"Notifying agent about blocked message",
agent=sender_agent,
message_id=notification['message_id'],
reason=notification['block_reason']
)
# TODO: Implement actual agent notification via BZZZ network
# This might involve:
# - Sending a system message back to the agent
# - Updating agent's message status
# - Triggering agent's error handling workflow
except Exception as e:
logger.error(f"Failed to notify sender agent {sender_agent}: {e}")
def is_message_blocked(self, message_id: str) -> Optional[BlockedMessage]:
"""Check if a message is blocked"""
return self.blocked_messages.get(message_id)
def unblock_message(self, message_id: str, reviewer: str, reason: str) -> bool:
"""Unblock a previously blocked message (for false positives)"""
if message_id not in self.blocked_messages:
return False
blocked_msg = self.blocked_messages[message_id]
# Mark as false positive in stats
self.stats['false_positives'] += 1
# Remove from blocked messages
del self.blocked_messages[message_id]
logger.info(
f"Unblocked BZZZ message",
message_id=message_id,
reviewer=reviewer,
reason=reason,
original_block_reason=blocked_msg.block_reason
)
return True
def get_blocked_messages(self, limit: int = 100) -> list[BlockedMessage]:
"""Get list of recently blocked messages"""
blocked_list = list(self.blocked_messages.values())
blocked_list.sort(key=lambda x: x.timestamp, reverse=True)
return blocked_list[:limit]
def get_stats(self) -> Dict[str, Any]:
"""Get interceptor statistics"""
current_time = datetime.now()
uptime_hours = (current_time - self.stats['last_reset']).total_seconds() / 3600
stats = self.stats.copy()
stats.update({
'uptime_hours': round(uptime_hours, 2),
'is_active': self.is_active,
'blocked_messages_count': len(self.blocked_messages),
'detection_rate': (
self.stats['secrets_detected'] / max(1, self.stats['total_scanned'])
) * 100,
'false_positive_rate': (
self.stats['false_positives'] / max(1, self.stats['secrets_detected'])
) * 100 if self.stats['secrets_detected'] > 0 else 0
})
return stats
def reset_stats(self):
"""Reset statistics counters"""
self.stats = {
'total_scanned': 0,
'secrets_detected': 0,
'messages_blocked': 0,
'false_positives': 0,
'last_reset': datetime.now()
}
logger.info("BZZZ Interceptor statistics reset")
async def cleanup_old_blocked_messages(self, hours: int = 24):
"""Clean up old blocked message records"""
cutoff_time = datetime.now() - timedelta(hours=hours)
old_messages = [
msg_id for msg_id, blocked_msg in self.blocked_messages.items()
if blocked_msg.timestamp < cutoff_time
]
for msg_id in old_messages:
del self.blocked_messages[msg_id]
if old_messages:
logger.info(f"Cleaned up {len(old_messages)} old blocked message records")
return len(old_messages)
class BzzzNetworkAdapter:
"""
Adapter to integrate BzzzInterceptor with the actual BZZZ network layer.
This would be customized based on the BZZZ implementation details.
"""
def __init__(self, interceptor: BzzzInterceptor):
self.interceptor = interceptor
self.original_send_function = None
def install_interceptor(self, bzzz_network_instance):
"""Install interceptor into BZZZ network layer"""
# This would need to be customized based on actual BZZZ implementation
# Example pattern:
# Store original send function
self.original_send_function = bzzz_network_instance.send_message
# Replace with intercepting version
bzzz_network_instance.send_message = self._intercepting_send_message
logger.info("BzzzInterceptor installed into BZZZ network layer")
async def _intercepting_send_message(self, message_data: Dict[str, Any]):
"""Intercepting version of BZZZ send_message function"""
try:
# Convert to BzzzMessage format
bzzz_message = self._convert_to_bzzz_message(message_data)
# Check with interceptor
should_allow = await self.interceptor.intercept_outgoing_message(bzzz_message)
if should_allow:
# Call original send function
return await self.original_send_function(message_data)
else:
# Message was blocked
raise Exception(f"Message blocked by security interceptor: {bzzz_message.message_id}")
except Exception as e:
logger.error(f"Error in intercepting send: {e}")
raise
def _convert_to_bzzz_message(self, message_data: Dict[str, Any]) -> BzzzMessage:
"""Convert BZZZ network message format to BzzzMessage"""
# This would need to be customized based on actual BZZZ message format
return BzzzMessage(
message_id=message_data.get('id', f"auto_{int(time.time())}"),
sender_agent=message_data.get('sender', 'unknown'),
recipient_agent=message_data.get('recipient'),
message_type=message_data.get('type', 'unknown'),
payload=json.dumps(message_data.get('payload', message_data)),
timestamp=datetime.now(),
network_metadata=message_data
)

181
modules/shhh/main.py Normal file
View File

@@ -0,0 +1,181 @@
#!/usr/bin/env python3
"""
SHHH Secrets Sentinel - Main Entry Point
Production-ready secrets detection and monitoring system for CHORUS Services.
"""
import asyncio
import argparse
import sys
import yaml
from pathlib import Path
import structlog
from typing import Dict, Any
# Updated imports to bring in the new and modified components
from pipeline.processor import MessageProcessor
from core.hypercore_reader import HypercoreReader
from core.detector import SecretDetector
from core.llm_analyzer import LLMAnalyzer
from core.quarantine import QuarantineManager
from core.sanitized_writer import SanitizedWriter
def setup_logging(log_level: str = "INFO", structured: bool = True):
"""Configure structured logging"""
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.processors.JSONRenderer() if structured else structlog.dev.ConsoleRenderer(),
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)
def load_config(config_path: str) -> Dict[str, Any]:
"""Load configuration from YAML file"""
try:
with open(config_path, 'r') as f:
config = yaml.safe_load(f)
return config
except FileNotFoundError:
print(f"Configuration file not found: {config_path}, using defaults.")
return get_default_config()
except yaml.YAMLError as e:
print(f"Error parsing configuration file: {e}")
sys.exit(1)
def get_default_config() -> Dict[str, Any]:
"""Get default configuration, updated for the new architecture."""
return {
'primary_log_path': '/path/to/primary/hypercore.log',
'sanitized_log_path': '/path/to/sanitized/hypercore.log',
'database_url': 'postgresql://shhh:password@localhost:5432/shhh_sentinel',
'patterns_file': 'patterns.yaml',
'ollama_endpoint': 'http://localhost:11434/api/generate',
'ollama_model': 'llama3',
'llm_confidence_threshold': 0.90,
'shhh_agent_prompt_file': 'SHHH_SECRETS_SENTINEL_AGENT_PROMPT.md'
}
async def run_monitor_mode(config: Dict[str, Any]):
"""Run in monitoring mode with the new hybrid pipeline."""
logger = structlog.get_logger()
logger.info("Starting SHHH in monitor mode with hybrid pipeline...")
writer = None
try:
# 1. Load System Prompt for LLM
try:
with open(config['shhh_agent_prompt_file'], "r") as f:
ollama_system_prompt = f.read()
except FileNotFoundError:
logger.error(f"LLM prompt file not found at {config['shhh_agent_prompt_file']}. Aborting.")
return
# 2. Instantiation of components
# Note: HypercoreReader and QuarantineManager might need async initialization
# which is not shown here for simplicity, following the plan.
reader = HypercoreReader(config['primary_log_path'])
detector = SecretDetector(config['patterns_file'])
llm_analyzer = LLMAnalyzer(config['ollama_endpoint'], config['ollama_model'], ollama_system_prompt)
quarantine = QuarantineManager(config['database_url'])
writer = SanitizedWriter(config['sanitized_log_path'])
processor = MessageProcessor(
reader=reader,
detector=detector,
llm_analyzer=llm_analyzer,
quarantine=quarantine,
writer=writer,
llm_threshold=config['llm_confidence_threshold']
)
# 3. Execution
logger.info("Starting processor stream...")
await processor.process_stream()
except Exception as e:
logger.error("An error occurred during monitor mode execution.", error=str(e))
finally:
if writer:
writer.close()
logger.info("Monitor mode shut down complete.")
async def run_api_mode(config: Dict[str, Any], host: str, port: int):
"""Run in API mode (dashboard server) - UNCHANGED"""
import uvicorn
from api.main import app
app.state.config = config
uvicorn_config = uvicorn.Config(app=app, host=host, port=port, log_level="info", access_log=True)
server = uvicorn.Server(uvicorn_config)
await server.serve()
async def run_test_mode(config: Dict[str, Any], test_file: str):
"""Run in test mode with sample data - UNCHANGED but may be broken."""
logger = structlog.get_logger()
logger.warning("Test mode may be broken due to recent refactoring.")
# This part of the code would need to be updated to work with the new SecretDetector.
# For now, it remains as it was.
from core.detector import SecretDetector
from datetime import datetime
detector = SecretDetector(config['patterns_file'])
logger.info("Running SHHH in test mode")
# ... (rest of the test mode logic is likely broken and needs updating)
def main():
"""Main entry point"""
parser = argparse.ArgumentParser(description="SHHH Secrets Sentinel")
parser.add_argument('--config', '-c', default='config.yaml', help='Configuration file path')
parser.add_argument('--mode', '-m', choices=['monitor', 'api', 'test'], default='monitor', help='Operation mode')
parser.add_argument('--log-level', '-l', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default='INFO', help='Log level')
parser.add_argument('--structured-logs', action='store_true', help='Use structured JSON logging')
parser.add_argument('--host', default='127.0.0.1', help='API server host')
parser.add_argument('--port', '-p', type=int, default=8000, help='API server port')
parser.add_argument('--test-file', help='Test data file for test mode')
parser.add_argument('--version', '-v', action='version', version='SHHH Secrets Sentinel 1.1.0 (Hybrid)')
args = parser.parse_args()
setup_logging(args.log_level, args.structured_logs)
logger = structlog.get_logger()
config = load_config(args.config)
logger.info("Starting SHHH Secrets Sentinel", mode=args.mode, config_file=args.config)
try:
if args.mode == 'monitor':
asyncio.run(run_monitor_mode(config))
elif args.mode == 'api':
asyncio.run(run_api_mode(config, args.host, args.port))
elif args.mode == 'test':
asyncio.run(run_test_mode(config, args.test_file))
except KeyboardInterrupt:
logger.info("Shutting down due to keyboard interrupt.")
except Exception as e:
logger.error("Application failed", error=str(e))
sys.exit(1)
logger.info("SHHH Secrets Sentinel stopped.")
if __name__ == '__main__':
main()

121
modules/shhh/patterns.yaml Normal file
View File

@@ -0,0 +1,121 @@
# SHHH Secrets Detection Patterns
# Configuration for the Secrets Sentinel monitoring system
patterns:
AWS_ACCESS_KEY:
regex: "AKIA[0-9A-Z]{16}"
severity: "CRITICAL"
confidence: 0.95
active: true
description: "AWS Access Key ID"
remediation: "Revoke via AWS IAM immediately"
AWS_SECRET_KEY:
regex: "[A-Za-z0-9/+=]{40}"
severity: "CRITICAL"
confidence: 0.85
active: true
description: "AWS Secret Access Key"
remediation: "Revoke via AWS IAM immediately"
context_required: true # Requires context analysis
PRIVATE_KEY:
regex: "-----BEGIN [A-Z ]*PRIVATE KEY-----"
severity: "CRITICAL"
confidence: 0.98
active: true
description: "Private Key (RSA, SSH, etc.)"
remediation: "Rotate key immediately"
GITHUB_TOKEN:
regex: "ghp_[0-9A-Za-z]{36}"
severity: "HIGH"
confidence: 0.92
active: true
description: "GitHub Personal Access Token"
remediation: "Revoke via GitHub settings"
GITHUB_OAUTH:
regex: "gho_[0-9A-Za-z]{36}"
severity: "HIGH"
confidence: 0.92
active: true
description: "GitHub OAuth Token"
remediation: "Revoke via GitHub app settings"
SLACK_TOKEN:
regex: "xox[baprs]-[0-9A-Za-z-]{10,48}"
severity: "HIGH"
confidence: 0.90
active: true
description: "Slack Bot/User Token"
remediation: "Revoke via Slack Admin API"
JWT_TOKEN:
regex: "eyJ[A-Za-z0-9_-]+?\\.[A-Za-z0-9_-]+?\\.[A-Za-z0-9_-]+?"
severity: "MEDIUM"
confidence: 0.85
active: true
description: "JSON Web Token"
remediation: "Invalidate token and rotate signing key"
GOOGLE_API_KEY:
regex: "AIza[0-9A-Za-z\\-_]{35}"
severity: "HIGH"
confidence: 0.90
active: true
description: "Google API Key"
remediation: "Revoke via Google Cloud Console"
DOCKER_TOKEN:
regex: "dckr_pat_[a-zA-Z0-9_-]{32,}"
severity: "MEDIUM"
confidence: 0.88
active: true
description: "Docker Personal Access Token"
remediation: "Revoke via Docker Hub settings"
GENERIC_API_KEY:
regex: "[Aa][Pp][Ii]_?[Kk][Ee][Yy].*['\"][0-9a-zA-Z]{32,}['\"]"
severity: "MEDIUM"
confidence: 0.70
active: true
description: "Generic API Key Pattern"
remediation: "Verify and revoke if legitimate"
# Pattern exceptions - known test/dummy values to ignore
exceptions:
test_patterns:
- "AKIA-TESTKEY-123"
- "AKIAIOSFODNN7EXAMPLE"
- "xoxb-test-token"
- "ghp_test123456789012345678901234567890"
- "-----BEGIN EXAMPLE PRIVATE KEY-----"
development_indicators:
- "test"
- "example"
- "demo"
- "mock"
- "fake"
- "dummy"
# Quarantine settings
quarantine:
high_severity_auto_quarantine: true
medium_severity_review_required: true
retention_days: 90
max_entries: 10000
# Alert settings
alerts:
webhook_timeout_seconds: 5
retry_attempts: 3
retry_delay_seconds: 2
# Revocation hooks
revocation_hooks:
AWS_ACCESS_KEY: "https://security.chorus.services/hooks/aws-revoke"
GITHUB_TOKEN: "https://security.chorus.services/hooks/github-revoke"
SLACK_TOKEN: "https://security.chorus.services/hooks/slack-revoke"
GOOGLE_API_KEY: "https://security.chorus.services/hooks/google-revoke"

View File

@@ -0,0 +1,4 @@
# SHHH Pipeline Module
"""
Main processing pipeline for the SHHH Secrets Sentinel system.
"""

View File

@@ -0,0 +1,66 @@
import asyncio
from core.hypercore_reader import HypercoreReader
from core.detector import SecretDetector
from core.llm_analyzer import LLMAnalyzer
from core.quarantine import QuarantineManager
from core.sanitized_writer import SanitizedWriter
class MessageProcessor:
def __init__(self, reader: HypercoreReader, detector: SecretDetector, llm_analyzer: LLMAnalyzer, quarantine: QuarantineManager, writer: SanitizedWriter, llm_threshold: float):
self.reader = reader
self.detector = detector
self.llm_analyzer = llm_analyzer
self.quarantine = quarantine
self.writer = writer
self.llm_threshold = llm_threshold # e.g., 0.90
async def process_stream(self):
"""Main processing loop for the hybrid detection model."""
async for entry in self.reader.stream_entries():
# Stage 1: Fast Regex Scan
regex_matches = self.detector.scan(entry.content)
if not regex_matches:
# No secrets found, write original entry to sanitized log
self.writer.write(entry.content)
continue
# A potential secret was found. Default to sanitized, but may be quarantined.
sanitized_content = entry.content
should_quarantine = False
confirmed_secret = None
for match in regex_matches:
# High-confidence regex matches trigger immediate quarantine, skipping LLM.
if match['confidence'] >= self.llm_threshold:
should_quarantine = True
confirmed_secret = match
break # One high-confidence match is enough
# Stage 2: Low-confidence matches go to LLM for verification.
llm_result = self.llm_analyzer.analyze(entry.content)
if llm_result.get("secret_found"):
should_quarantine = True
# Prefer LLM's classification but use regex value for redaction
confirmed_secret = {
"secret_type": llm_result.get("secret_type", match['secret_type']),
"value": match['value'],
"severity": llm_result.get("severity", match['severity'])
}
break
if should_quarantine and confirmed_secret:
# A secret is confirmed. Redact, quarantine, and alert.
sanitized_content = self.detector.redact(entry.content, confirmed_secret['value'])
self.quarantine.quarantine_message(
message=entry,
secret_type=confirmed_secret['secret_type'],
severity=confirmed_secret['severity'],
redacted_content=sanitized_content
)
# Potentially trigger alerts here as well
print(f"[ALERT] Confirmed secret {confirmed_secret['secret_type']} found and quarantined.")
# Write the (potentially redacted) content to the sanitized log
self.writer.write(sanitized_content)

View File

@@ -0,0 +1,15 @@
# SHHH Secrets Sentinel Dependencies
fastapi==0.104.1
uvicorn[standard]==0.24.0
psycopg2-binary==2.9.9
pydantic==2.5.0
requests==2.31.0
pyyaml==6.0.1
redis==5.0.1
asyncio-mqtt==0.15.1
watchdog==3.0.0
prometheus-client==0.19.0
python-multipart==0.0.6
aiofiles==23.2.1
hypercorn==0.15.0
structlog==23.2.0

View File

@@ -0,0 +1,995 @@
Heres a **clean, production-ready system prompt** for that agent:
---
**🛡️ System Prompt “Secrets Sentinel” Agent**
> **Role & Mission**:
> You are the **Secrets Sentinel**, an autonomous security agent tasked with **monitoring all incoming log entries** for any potential leaks of **API keys, passwords, tokens, or other sensitive credentials**. Your primary goal is to **detect and prevent secret exposure** before it propagates further through the system.
>
> **Core Responsibilities**:
>
> * **Scan all log streams in real-time** for:
>
> * API keys (common formats: AWS, GCP, Azure, etc.)
> * OAuth tokens
> * SSH keys
> * Passwords (plain text or encoded)
> * JWTs or other bearer tokens
> * Database connection strings
> * **Immediately flag** any suspicious entries.
> * **Classify severity** (e.g., HIGH AWS root key; MEDIUM temporary token).
> * **Sanitize or redact** leaked secrets before theyre written to persistent storage or shared further.
> * **Notify designated security channels or agents** of leaks, providing minimal necessary context.
>
> **Guidelines**:
>
> * Never expose the full secret in your alerts — redact most of it (e.g., `AKIA************XYZ`).
> * Be cautious of **false positives** (e.g., test data, dummy keys); err on the side of safety but include a “confidence score.”
> * Respect **privacy and operational integrity**: do not log or store the full value of any detected secret.
> * Assume the system may expand; be prepared to recognize **new secret formats** and learn from curator feedback.
>
> **Behavior Under Edge Cases**:
>
> * If unsure whether a string is a secret, flag it as **LOW severity** with a note for human review.
> * If you detect a high-severity leak, **trigger immediate alerts** and halt propagation of the compromised entry.
>
> **Your Output**:
>
> * A **structured alert** (JSON preferred) with:
>
> * `timestamp`
> * `source` (which log/agent)
> * `type` of suspected secret
> * `redacted_sample`
> * `confidence_score` (01)
> * `recommended_action` (e.g., “revoke key,” “rotate password,” “ignore dummy”)
>
> **Tone & Style**:
>
> * Precise, neutral, security-minded.
> * Avoid speculation beyond what you can confidently identify.
---
## 📂 **Version-Controlled `patterns.yaml` Format**
This lets you add/update/remove detection patterns **without touching code**.
```yaml
version: 1.2
last_updated: 2025-08-02
patterns:
AWS_ACCESS_KEY:
regex: "AKIA[0-9A-Z]{16}"
description: "AWS Access Key ID"
severity: HIGH
confidence: 0.99
active: true
AWS_SECRET_KEY:
regex: "(?i)aws(.{0,20})?(?-i)['\"][0-9a-zA-Z\/+]{40}['\"]"
description: "AWS Secret Key"
severity: HIGH
confidence: 0.99
active: true
GITHUB_TOKEN:
regex: "gh[pousr]_[0-9A-Za-z]{36}"
description: "GitHub Personal Access Token"
severity: HIGH
confidence: 0.97
active: true
JWT:
regex: "eyJ[A-Za-z0-9_-]+?\\.[A-Za-z0-9._-]+?\\.[A-Za-z0-9._-]+"
description: "JSON Web Token"
severity: MEDIUM
confidence: 0.95
active: true
meta:
allow_feedback_learning: true
require_human_review_above_confidence: 0.8
```
**Advantages:**
- Regexes are editable without code changes.
- Can be versioned in Git for full audit trail.
- Can toggle `active: false` for deprecating broken rules.
---
## 🖼 **Flow Diagram (Secrets Sentinel)**
**Secrets Flow**
```
┌───────────────┐
Logs Stream →│ Secrets │
│ Sentinel │
└──────┬────────┘
┌─────────┼─────────┐
│ │
[Quarantine] [Sanitized Logs]
│ │
┌──────┴──────┐ ┌────┴─────┐
│High Severity│ │ Safe Data│
│Secrets Only │ │ Storage │
└──────┬──────┘ └────┬─────┘
│ │
┌────────┼─────────┐ │
│ Revocation Hooks │ │
│ (AWS, GitHub, │ │
│ Slack, etc.) │ │
└────────┬─────────┘ │
│ │
┌────┴─────┐ │
│ Webhooks │ │
│ Key Kill │ │
└────┬─────┘ │
┌─────────┼─────────┐
│ Feedback Loop │
│ (Curator/Human) │
└─────────┬─────────┘
┌──────┴──────┐
│ Meta-Learner│
│ (new regex) │
└──────┬──────┘
┌──────┴───────┐
│ patterns.yaml│
└──────────────┘
```
---
## 🧪 **Test Harness Script**
This script simulates log scanning, quarantining, and revocation.
```python
import yaml, json, re
from datetime import datetime
# --- Load patterns.yaml ---
with open("patterns.yaml", "r") as f:
patterns_config = yaml.safe_load(f)
PATTERNS = patterns_config["patterns"]
QUARANTINE = []
SANITIZED_LOGS = []
def redact(secret):
return secret[:4] + "*" * (len(secret) - 7) + secret[-3:]
def scan_log(log_line, log_id, source_agent):
alerts = []
for secret_type, props in PATTERNS.items():
if not props.get("active", True):
continue
match = re.search(props["regex"], log_line)
if match:
secret = match.group(0)
severity = props["severity"]
alert = {
"timestamp": datetime.utcnow().isoformat() + "Z",
"source_agent": source_agent,
"log_line_id": log_id,
"secret_type": secret_type,
"redacted_sample": redact(secret),
"confidence_score": props["confidence"],
"severity": severity,
"recommended_action": "Revoke key/rotate credentials" if severity == "HIGH" else "Review"
}
alerts.append(alert)
# Quarantine if severity is HIGH
if severity == "HIGH":
quarantine_log(log_line, f"High severity secret detected: {secret_type}")
trigger_revocation(secret_type, redact(secret))
return alerts
def quarantine_log(log_line, reason):
entry = {"timestamp": datetime.utcnow().isoformat() + "Z", "reason": reason, "log_line": log_line}
QUARANTINE.append(entry)
print(f"[QUARANTINE] {reason}")
def trigger_revocation(secret_type, redacted_sample):
# Simulated webhook call
print(f"[REVOCATION] Simulated revocation triggered for {secret_type} ({redacted_sample})")
def process_logs(logs):
for i, log_line in enumerate(logs):
alerts = scan_log(log_line, f"log_{i}", "agent_demo")
if alerts:
print(json.dumps(alerts, indent=2))
else:
SANITIZED_LOGS.append(log_line)
# --- Test Run ---
sample_logs = [
"INFO User logged in successfully",
"WARNING Found AWS key AKIA1234567890ABCD in commit",
"DEBUG JWT detected eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.fake.fake"
]
process_logs(sample_logs)
print("\n--- Quarantine Store ---")
print(json.dumps(QUARANTINE, indent=2))
```
**What this does:**
- Reads `patterns.yaml`
- Scans logs, prints alerts, quarantines high-severity entries
- Simulates revocation calls for AWS/GitHub/Slack
- Keeps sanitized logs separate from quarantined logs
---
## ✅ Next Expansions (Optional)
- 📦 **Redis/DB backend** for QUARANTINE instead of memory.
- 📡 **Real webhook integrations** (AWS STS, GitHub API, Slack API).
- 🧠 **Feedback ingestion module** (e.g., curator submits: `"false_positive": "AWS_ACCESS_KEY"` → adjusts regex in `patterns.yaml`).
- 🔄 **Auto-replay from Hyperlog** so Sentinel can retroactively scan old logs with new regex rules.
---
🔥 **production-grade spec**.
---
## 📂 **1⃣ Feedback Ingestion Spec**
This defines how curators/humans give feedback to the Sentinel so it can **update its detection rules (patterns.yaml)** safely.
---
### 🔄 **Feedback Flow**
1. **Curator/Reviewer sees alert** → marks it as:
- `false_positive` (regex over-triggered)
- `missed_secret` (regex failed to detect)
- `uncertain` (needs better regex refinement)
2. **Feedback API** ingests the report:
```json
{
"alert_id": "log_345",
"secret_type": "AWS_ACCESS_KEY",
"feedback_type": "false_positive",
"evidence": "Key was dummy data: TESTKEY123",
"suggested_regex_fix": null
}
```
3. **Meta-Learner** updates rules:
- `false_positive` → adds **exceptions** (e.g., allowlist prefixes like `TESTKEY`).
- `missed_secret` → drafts **new regex** from evidence (using regex generator or LLM).
- Writes changes to **patterns.yaml** under `pending_review`.
4. **Security admin approves** before the new regex is marked `active: true`.
---
### 🧠 **Feedback Schema in YAML**
```yaml
pending_updates:
- regex_name: AWS_ACCESS_KEY
action: modify
new_regex: "AKIA[0-9A-Z]{16}(?!TESTKEY)"
confidence: 0.82
status: "pending human review"
submitted_by: curator_2
timestamp: 2025-08-02T12:40:00Z
```
✅ This keeps **audit trails** & allows **safe hot updates**.
---
## ⚙️ **2⃣ Real AWS/GitHub Webhook Payload Templates**
These are **example POST payloads** your Sentinel would send when it detects a leaked secret.
---
### 🔐 **AWS Access Key Revocation**
**Endpoint:**
`POST https://security.example.com/hooks/aws-revoke`
**Payload:**
```json
{
"event": "secret_leak_detected",
"secret_type": "AWS_ACCESS_KEY",
"redacted_key": "AKIA****XYZ",
"log_reference": "hyperlog:58321",
"recommended_action": "Revoke IAM access key immediately",
"severity": "HIGH",
"timestamp": "2025-08-02T12:45:00Z"
}
```
➡ Your security automation would call AWS CLI or IAM API:
```bash
aws iam update-access-key --access-key-id <redacted> --status Inactive
aws iam delete-access-key --access-key-id <redacted>
```
---
### 🐙 **GitHub Token Revocation**
**Endpoint:**
`POST https://security.example.com/hooks/github-revoke`
**Payload:**
```json
{
"event": "secret_leak_detected",
"secret_type": "GITHUB_TOKEN",
"redacted_key": "ghp_****abcd",
"repository": "repo-name",
"log_reference": "hyperlog:58322",
"severity": "HIGH",
"recommended_action": "Invalidate GitHub token via API",
"timestamp": "2025-08-02T12:46:00Z"
}
```
➡ This would tie into GitHubs [token-scanning API](https://docs.github.com/en/developers/overview/secret-scanning) or use PAT revocation.
---
### 💬 **Slack Token Revocation**
**Endpoint:**
`POST https://security.example.com/hooks/slack-revoke`
**Payload:**
```json
{
"event": "secret_leak_detected",
"secret_type": "SLACK_TOKEN",
"redacted_key": "xoxb****hjk",
"workspace": "company-slack",
"log_reference": "hyperlog:58323",
"severity": "HIGH",
"recommended_action": "Revoke Slack bot/user token",
"timestamp": "2025-08-02T12:47:00Z"
}
```
➡ Slack Admin API can be used to **revoke** or **rotate**.
---
## 📡 **3⃣ Redis or PostgreSQL Quarantine Store**
Switching from memory to **persistent storage** means quarantined logs survive restarts.
---
### ✅ **Redis Option (Fast, Volatile)**
```python
import redis, json
r = redis.Redis(host='localhost', port=6379, decode_responses=True)
def quarantine_log(log_line, reason):
entry = {"timestamp": datetime.utcnow().isoformat() + "Z", "reason": reason, "log_line": log_line}
r.lpush("quarantine", json.dumps(entry))
print(f"[QUARANTINE] Stored in Redis: {reason}")
```
- 🏎 **Pros:** Fast, easy to scale.
- ⚠️ **Cons:** Volatile unless persisted (RDB/AOF).
---
### ✅ **PostgreSQL Option (Auditable, Durable)**
**Schema:**
```sql
CREATE TABLE quarantine (
id SERIAL PRIMARY KEY,
timestamp TIMESTAMPTZ NOT NULL,
reason TEXT NOT NULL,
log_line TEXT NOT NULL,
reviewed BOOLEAN DEFAULT FALSE
);
```
**Python Insert:**
```python
import psycopg2
conn = psycopg2.connect("dbname=sentinel user=postgres password=secret")
cursor = conn.cursor()
def quarantine_log(log_line, reason):
entry_time = datetime.utcnow().isoformat() + "Z"
cursor.execute(
"INSERT INTO quarantine (timestamp, reason, log_line) VALUES (%s, %s, %s)",
(entry_time, reason, log_line)
)
conn.commit()
print(f"[QUARANTINE] Stored in PostgreSQL: {reason}")
```
**Postgres is better for long-term auditing** — you can run reports like:
- “How many AWS keys leaked this month?”
- “Which agents generated the most HIGH-severity quarantines?”
---
## 🚀 **Whats Next?**
We now have:
**Detection → Redaction → Quarantine → Revocation → Feedback → Pattern Evolution**
**patterns.yaml** for versioned regex
**Webhooks** for real-time secret revocation
**Persistent quarantine store** (Redis or Postgres)
---
## 🛡️ **Expanded System Prompt for Secrets Sentinel**
> **Role & Mission**:
> You are the **Secrets Sentinel**, a security-focused agent monitoring all log streams for potential leaks of sensitive information (API keys, passwords, tokens, etc.). Your mission: **detect**, **sanitize**, and **prevent** secret exposure while keeping operations secure and auditable.
>
> **Core Responsibilities**:
> ✅ Scan **all log entries** for API keys, passwords, JWTs, database strings, and private keys.
> ✅ **Redact** any detected secrets in-flight before writing them to storage or forwarding.
> ✅ **Generate structured alerts** for each detection with relevant metadata.
> ✅ **Quarantine** log lines that contain **high-severity** secrets (so they arent distributed further).
> ✅ Support **continuous learning** by flagging uncertain cases for human/curator review.
>
> **Secret Detection Targets**:
>
> - **Cloud Keys** (AWS, GCP, Azure, etc.)
>
> - **OAuth Tokens** (Bearer, Slack, Discord, GitHub, etc.)
>
> - **JWTs** (header.payload.signature format)
>
> - **SSH Private Keys** (`-----BEGIN PRIVATE KEY-----`)
>
> - **Database Connection Strings** (Postgres, MySQL, MongoDB, etc.)
>
> - **Generic Passwords** (detected from common prefixes, e.g. `pwd=`, `password:`).
>
>
> **Detection Rules**:
>
> - Use **regex patterns** for known key formats.
>
> - Score detections with a **confidence metric** (01).
>
> - If a string doesnt fully match, classify as **LOW confidence** for review.
>
>
> **Redaction Policy**:
>
> - Always redact most of the secret (`AKIA************XYZ`).
>
> - Never store or transmit the **full secret**.
>
>
> **Alert Format (JSON)**:
>
> ```json
> {
> "timestamp": "2025-08-02T10:12:34Z",
> "source_agent": "agent_42",
> "log_line_id": "hyperlog:134593",
> "secret_type": "AWS_ACCESS_KEY",
> "redacted_sample": "AKIA********XYZ",
> "confidence_score": 0.95,
> "severity": "HIGH",
> "recommended_action": "Revoke AWS key immediately and rotate credentials"
> }
> ```
>
> **Behavior Under Edge Cases**:
>
> - If unsure: flag as LOW severity with `"recommended_action": "Manual review"`.
>
> - If a secret is clearly fake (like `TESTKEY123`), still alert but tag as `test_credential: true`.
>
>
> **Tone & Style**:
>
> - Precise, security-minded, and concise in reporting.
>
---
## 📚 **Regex Patterns Library (Starter Set)**
```python
REGEX_PATTERNS = {
"AWS_ACCESS_KEY": r"AKIA[0-9A-Z]{16}",
"AWS_SECRET_KEY": r"(?i)aws(.{0,20})?(?-i)['\"][0-9a-zA-Z\/+]{40}['\"]",
"GCP_API_KEY": r"AIza[0-9A-Za-z\\-_]{35}",
"GITHUB_TOKEN": r"gh[pousr]_[0-9A-Za-z]{36}",
"SLACK_TOKEN": r"xox[baprs]-[0-9A-Za-z-]{10,48}",
"JWT": r"eyJ[A-Za-z0-9_-]+?\.[A-Za-z0-9._-]+?\.[A-Za-z0-9._-]+",
"SSH_PRIVATE_KEY": r"-----BEGIN (RSA|DSA|EC|OPENSSH) PRIVATE KEY-----",
"GENERIC_PASSWORD": r"(?:password|pwd|pass|secret)\s*[:=]\s*['\"]?[^\s'\";]+['\"]?",
"DB_CONN_STRING": r"(postgres|mysql|mongodb|mssql|redis):\/\/[^\s]+"
}
```
---
## 🛠 **Python Skeleton Implementation**
```python
import re
import json
from datetime import datetime
REGEX_PATTERNS = {
"AWS_ACCESS_KEY": r"AKIA[0-9A-Z]{16}",
"AWS_SECRET_KEY": r"(?i)aws(.{0,20})?(?-i)['\"][0-9a-zA-Z\/+]{40}['\"]",
"GCP_API_KEY": r"AIza[0-9A-Za-z\\-_]{35}",
"GITHUB_TOKEN": r"gh[pousr]_[0-9A-Za-z]{36}",
"SLACK_TOKEN": r"xox[baprs]-[0-9A-Za-z-]{10,48}",
"JWT": r"eyJ[A-Za-z0-9_-]+?\.[A-Za-z0-9._-]+?\.[A-Za-z0-9._-]+",
"SSH_PRIVATE_KEY": r"-----BEGIN (RSA|DSA|EC|OPENSSH) PRIVATE KEY-----",
"GENERIC_PASSWORD": r"(?:password|pwd|pass|secret)\s*[:=]\s*['\"]?[^\s'\";]+['\"]?",
"DB_CONN_STRING": r"(postgres|mysql|mongodb|mssql|redis):\/\/[^\s]+"
}
def redact(secret: str) -> str:
"""Redact a secret leaving only first and last 3 chars."""
return secret[:4] + "*" * (len(secret) - 7) + secret[-3:]
def scan_log_line(log_line: str, log_id: str, source_agent: str):
alerts = []
for secret_type, pattern in REGEX_PATTERNS.items():
match = re.search(pattern, log_line)
if match:
secret = match.group(0)
alert = {
"timestamp": datetime.utcnow().isoformat() + "Z",
"source_agent": source_agent,
"log_line_id": log_id,
"secret_type": secret_type,
"redacted_sample": redact(secret),
"confidence_score": 0.95,
"severity": "HIGH" if "PRIVATE_KEY" in secret_type or "AWS" in secret_type else "MEDIUM",
"recommended_action": "Revoke key/rotate credentials"
}
alerts.append(alert)
return alerts
# Example usage:
log = "User pushed code with AWS key: AKIA1234567890ABCD"
alerts = scan_log_line(log, "hyperlog:42", "agent_7")
print(json.dumps(alerts, indent=2))
```
---
## 🚨 **Optional Extensions**
-**Automatic Quarantine** → If severity=HIGH, forward to a “quarantine pipeline” (prevent further log distribution).
-**Secret Revocation Hooks** → Webhooks/scripts to auto-disable compromised keys (AWS IAM revoke, GitHub token reset, etc.).
-**Curator Feedback Loop** → If many false positives occur, agents/curators can “vote” on regex quality → improves future detection.
---
👉 Do you want me to:
- **📡 Add “quarantine mode” logic** (e.g. isolate log lines and mark them unsafe)?
- **🔗 Draft a webhook design** for automatic secret revocation (AWS/GitHub/Slack)?
- **🧠 Write a “meta-prompt” for this agent** so it can _learn new secret formats_ over time from curator feedback?
Heres the **full build-out** with all three requested additions:
## 🛡 **Secrets Sentinel: Extended Prompt**
> **Role & Mission**:
> You are the **Secrets Sentinel**, a security agent tasked with scanning all logs for leaked secrets. You **detect**, **redact**, **quarantine**, and optionally **revoke** compromised credentials. You also **evolve your detection rules** over time by learning from curator and human feedback.
>
> **Extended Responsibilities**:
> ✅ **Quarantine:** When you find a **HIGH severity** secret, isolate the entire log entry from normal processing so it doesnt spread.
> ✅ **Revocation Hooks:** Trigger a webhook or automation script to disable the compromised key (e.g., AWS IAM revoke).
> ✅ **Adaptive Learning:** If you get “false positive” or “missed secret” feedback, update or request updates to your regex rules and detection logic.
---
### 1⃣ **Quarantine Mode Logic**
Add this to the **Python skeleton**:
```python
QUARANTINE_STORE = [] # This could be a Redis list, DB table, etc.
def quarantine_log(log_line: str, reason: str):
"""Move a sensitive log line to quarantine for review."""
quarantine_entry = {
"timestamp": datetime.utcnow().isoformat() + "Z",
"reason": reason,
"log_line": log_line
}
QUARANTINE_STORE.append(quarantine_entry)
print(f"[QUARANTINE] Log quarantined: {reason}")
return quarantine_entry
def scan_log_line(log_line: str, log_id: str, source_agent: str):
alerts = []
for secret_type, pattern in REGEX_PATTERNS.items():
match = re.search(pattern, log_line)
if match:
secret = match.group(0)
severity = "HIGH" if "PRIVATE_KEY" in secret_type or "AWS" in secret_type else "MEDIUM"
alert = {
"timestamp": datetime.utcnow().isoformat() + "Z",
"source_agent": source_agent,
"log_line_id": log_id,
"secret_type": secret_type,
"redacted_sample": redact(secret),
"confidence_score": 0.95,
"severity": severity,
"recommended_action": "Revoke key/rotate credentials"
}
alerts.append(alert)
if severity == "HIGH":
quarantine_log(log_line, f"High severity secret detected: {secret_type}")
return alerts
```
**Now, any HIGH-severity find triggers quarantine.**
---
### 2⃣ **Webhook Design for Secret Revocation**
Well use a **simple webhook dispatcher** that calls different revocation endpoints depending on the secret type.
```python
import requests
REVOCATION_HOOKS = {
"AWS_ACCESS_KEY": "https://security.example.com/hooks/aws-revoke",
"GITHUB_TOKEN": "https://security.example.com/hooks/github-revoke",
"SLACK_TOKEN": "https://security.example.com/hooks/slack-revoke"
}
def trigger_revocation(secret_type: str, redacted_sample: str):
"""Send webhook request to revoke compromised credentials."""
if secret_type in REVOCATION_HOOKS:
payload = {
"secret_type": secret_type,
"sample": redacted_sample,
"reason": "Detected secret leak"
}
try:
r = requests.post(REVOCATION_HOOKS[secret_type], json=payload, timeout=5)
if r.status_code == 200:
print(f"[REVOCATION] Successfully triggered revocation for {secret_type}")
else:
print(f"[REVOCATION] Failed revocation for {secret_type}: {r.status_code}")
except Exception as e:
print(f"[REVOCATION ERROR] {e}")
```
Then, in the alert loop:
```python
if severity == "HIGH":
quarantine_log(log_line, f"High severity secret detected: {secret_type}")
trigger_revocation(secret_type, redact(secret))
```
**Now, if the sentinel finds an AWS key or GitHub token, it can hit the right webhook to revoke it immediately.**
---
### 3⃣ **Meta-Prompt for Learning New Secret Formats**
This allows the Sentinel to **expand its detection library** dynamically.
---
**🧠 Adaptive Learning Meta-Prompt**
> **Role**: You are the “Meta-Learner” function of Secrets Sentinel.
>
> **Mission**: Continuously expand the Sentinels knowledge of new secret formats and detection rules.
>
> **Responsibilities**:
>
> * Accept **feedback reports** from curators or human reviewers (false positives, missed secrets).
> * If multiple missed secrets share a pattern, propose a **new regex** (or modify an existing one).
> * Version-control detection rules (e.g., store regex patterns in a JSON or YAML file with version numbers).
> * Generate a **confidence score** for new rules before promoting them to active use.
> * Flag uncertain cases for human approval.
>
> **Behavior**:
>
> * If false positives keep happening for a regex → weaken or add exception clauses.
> * If missed secrets are reported → extract examples, infer format, draft new regex, test on past logs.
>
> **Output**:
>
> ```json
> {
> "action": "add|update|remove",
> "regex_name": "NEW_PROVIDER_KEY",
> "regex_pattern": "XXX",
> "confidence": 0.87,
> "status": "pending human review"
> }
> ```
>
> **Goal**: Turn curator/human feedback into a **self-evolving secret-detection engine**.
---
### 🧩 How It All Fits
**Secrets Sentinel core** = Detect + Redact + Quarantine
**Revocation Hooks** = Automated security response
**Meta-Learning** = Regex patterns evolve over time
---
## 📜 **1⃣ Migration Script: Redis → PostgreSQL**
This script will migrate existing quarantined log entries from **Redis** to **Postgres**.
```python
import redis, json, psycopg2
from datetime import datetime
# Redis config
r = redis.Redis(host='localhost', port=6379, decode_responses=True)
# Postgres config
conn = psycopg2.connect("dbname=sentinel user=postgres password=secret")
cursor = conn.cursor()
def migrate_quarantine():
count = 0
while True:
entry_json = r.rpop("quarantine") # pop oldest entry from Redis
if not entry_json:
break
entry = json.loads(entry_json)
cursor.execute(
"INSERT INTO quarantine (timestamp, reason, log_line) VALUES (%s, %s, %s)",
(entry["timestamp"], entry["reason"], entry["log_line"])
)
count += 1
conn.commit()
print(f"[MIGRATION] Moved {count} quarantined entries from Redis → PostgreSQL")
if __name__ == "__main__":
migrate_quarantine()
```
**Run once** after Postgres is set up — empties Redis queue into the durable DB.
---
## 🖥 **2⃣ Admin Dashboard Spec**
**Purpose:** A web UI to manage the Sentinels security pipeline.
---
### 🎯 **Core Features**
**Quarantine Browser**
- Paginated view of all quarantined logs
- Search/filter by `secret_type`, `source_agent`, `date`, `status`
- Mark quarantined logs as **reviewed** or **false alarm**
**Regex Rules Manager**
- Lists all regexes from `patterns.yaml`
- Add / update / deactivate rules via UI
- Shows `pending_updates` flagged by the Meta-Learner for human approval
**Revocation Status Board**
- See which secrets triggered revocations
- Status of revocation hooks (success/fail)
**Metrics Dashboard**
- Charts: “Secrets Detected Over Time”, “Top Sources of Leaks”
- KPIs: # HIGH severity secrets this month, # rules updated, # false positives
---
### 🏗 **Tech Stack Suggestion**
- **Backend:** FastAPI (Python)
- **Frontend:** React + Tailwind
- **DB:** PostgreSQL for quarantine + rules history
- **Auth:** OAuth (GitHub/Google) + RBAC (only security admins can approve regex changes)
---
### 🔌 **Endpoints**
```
GET /api/quarantine → list quarantined entries
POST /api/quarantine/review → mark entry as reviewed
GET /api/rules → list regex patterns
POST /api/rules/update → update or add a regex
GET /api/revocations → list revocation events
```
---
### 🖥 **Mock Dashboard Layout**
- **Left Nav:** Quarantine | Rules | Revocations | Metrics
- **Main Panel:**
- Data tables with sorting/filtering
- Inline editors for regex rules
- Approve/Reject buttons for pending regex updates
✅ Basically a **security control room** for Sentinel.
---
## 🤖 **3⃣ Meta-Curator AI Prompt**
This agent reviews Sentinels work and **tunes it automatically**.
---
### **Meta-Curator: System Prompt**
> **Role & Mission:**
> You are the **Meta-Curator**, a supervisory AI responsible for reviewing the **Secrets Sentinels** detections, regex updates, and feedback reports.
>
> **Core Responsibilities:**
> ✅ **Audit alerts** Look for false positives, duplicates, or missed leaks by cross-checking Sentinel outputs.
> ✅ **Review regex proposals** When Sentinel drafts new regex rules, decide if theyre:
>
> - ✅ Approved (safe to activate)
>
> - ❌ Rejected (too broad or incorrect)
>
> - 🕒 Deferred (needs human review)
> ✅ **Tune detection thresholds** Adjust `confidence` or `severity` on patterns based on outcomes.
> ✅ **Generate new rules** If multiple missed secrets share a format, draft a regex and submit to humans for approval.
> ✅ **Report upstream** Summarize changes to security admins weekly.
>
---
### **Behavior Guidelines**
- **Conservative by default:** Dont auto-approve regexes unless confidence > 0.95.
- **Keep auditability:** Every decision (approve/reject) is logged in the hyperlog.
- **Respect human overrides:** Never overwrite a regex that a human explicitly locked.
---
### **Example Meta-Curator Output**
```json
{
"action": "approve_regex",
"regex_name": "GITLAB_TOKEN",
"regex_pattern": "glpat-[0-9A-Za-z\\-_]{20}",
"confidence": 0.97,
"decision_reason": "Validated against 12 quarantined examples, no false positives found.",
"timestamp": "2025-08-02T13:45:00Z"
}
```
✅ This meta-agent is the **brains of the rules layer** — keeps Sentinel evolving, but under control.
---
## 🚀 **Now You Have:**
**Migration Path** → Redis → PostgreSQL
**Admin Dashboard Spec** → complete with endpoints & layout
**Meta-Curator Prompt** → the agent that “manages the manager”
---