import re import yaml from pathlib import Path class SecretDetector: """ A simplified secret detection engine using configurable regex patterns. It scans text for secrets, redacts them, and provides metadata. """ def __init__(self, patterns_file: str = "patterns.yaml"): self.patterns_file = Path(patterns_file) self.patterns = self._load_patterns() def _load_patterns(self) -> dict: """Load detection patterns from YAML configuration.""" try: with open(self.patterns_file, 'r') as f: config = yaml.safe_load(f) patterns = config.get('patterns', {}) # Pre-compile regex for efficiency for name, props in patterns.items(): if props.get('active', True): props['compiled_regex'] = re.compile(props['regex']) return patterns except Exception as e: print(f"[ERROR] Failed to load patterns from {self.patterns_file}: {e}") return {} def scan(self, text: str) -> list[dict]: """Scans text and returns a list of found secrets with metadata.""" matches = [] for pattern_name, pattern in self.patterns.items(): if pattern.get('active', True) and 'compiled_regex' in pattern: regex_match = pattern['compiled_regex'].search(text) if regex_match: matches.append({ "secret_type": pattern_name, "value": regex_match.group(0), "confidence": pattern.get("confidence", 0.8), "severity": pattern.get("severity", "MEDIUM") }) return matches def redact(self, text: str, secret_value: str) -> str: """Redacts a specific secret value within a string.""" # Ensure we don't reveal too much for very short secrets if len(secret_value) < 8: return text.replace(secret_value, "[REDACTED]") redacted_str = secret_value[:4] + "****" + secret_value[-4:] return text.replace(secret_value, f"[REDACTED:{redacted_str}]")