Files
chorus-services/modules/shhh/core/detector.py
tony 4511f4c801 Pre-cleanup snapshot - all current files
🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-05 02:32:45 +10:00

53 lines
2.1 KiB
Python

import re
import yaml
from pathlib import Path
class SecretDetector:
"""
A simplified secret detection engine using configurable regex patterns.
It scans text for secrets, redacts them, and provides metadata.
"""
def __init__(self, patterns_file: str = "patterns.yaml"):
self.patterns_file = Path(patterns_file)
self.patterns = self._load_patterns()
def _load_patterns(self) -> dict:
"""Load detection patterns from YAML configuration."""
try:
with open(self.patterns_file, 'r') as f:
config = yaml.safe_load(f)
patterns = config.get('patterns', {})
# Pre-compile regex for efficiency
for name, props in patterns.items():
if props.get('active', True):
props['compiled_regex'] = re.compile(props['regex'])
return patterns
except Exception as e:
print(f"[ERROR] Failed to load patterns from {self.patterns_file}: {e}")
return {}
def scan(self, text: str) -> list[dict]:
"""Scans text and returns a list of found secrets with metadata."""
matches = []
for pattern_name, pattern in self.patterns.items():
if pattern.get('active', True) and 'compiled_regex' in pattern:
regex_match = pattern['compiled_regex'].search(text)
if regex_match:
matches.append({
"secret_type": pattern_name,
"value": regex_match.group(0),
"confidence": pattern.get("confidence", 0.8),
"severity": pattern.get("severity", "MEDIUM")
})
return matches
def redact(self, text: str, secret_value: str) -> str:
"""Redacts a specific secret value within a string."""
# Ensure we don't reveal too much for very short secrets
if len(secret_value) < 8:
return text.replace(secret_value, "[REDACTED]")
redacted_str = secret_value[:4] + "****" + secret_value[-4:]
return text.replace(secret_value, f"[REDACTED:{redacted_str}]")