import asyncio from core.hypercore_reader import HypercoreReader from core.detector import SecretDetector from core.llm_analyzer import LLMAnalyzer from core.quarantine import QuarantineManager from core.sanitized_writer import SanitizedWriter class MessageProcessor: def __init__(self, reader: HypercoreReader, detector: SecretDetector, llm_analyzer: LLMAnalyzer, quarantine: QuarantineManager, writer: SanitizedWriter, llm_threshold: float): self.reader = reader self.detector = detector self.llm_analyzer = llm_analyzer self.quarantine = quarantine self.writer = writer self.llm_threshold = llm_threshold # e.g., 0.90 async def process_stream(self): """Main processing loop for the hybrid detection model.""" async for entry in self.reader.stream_entries(): # Stage 1: Fast Regex Scan regex_matches = self.detector.scan(entry.content) if not regex_matches: # No secrets found, write original entry to sanitized log self.writer.write(entry.content) continue # A potential secret was found. Default to sanitized, but may be quarantined. sanitized_content = entry.content should_quarantine = False confirmed_secret = None for match in regex_matches: # High-confidence regex matches trigger immediate quarantine, skipping LLM. if match['confidence'] >= self.llm_threshold: should_quarantine = True confirmed_secret = match break # One high-confidence match is enough # Stage 2: Low-confidence matches go to LLM for verification. llm_result = self.llm_analyzer.analyze(entry.content) if llm_result.get("secret_found"): should_quarantine = True # Prefer LLM's classification but use regex value for redaction confirmed_secret = { "secret_type": llm_result.get("secret_type", match['secret_type']), "value": match['value'], "severity": llm_result.get("severity", match['severity']) } break if should_quarantine and confirmed_secret: # A secret is confirmed. Redact, quarantine, and alert. sanitized_content = self.detector.redact(entry.content, confirmed_secret['value']) self.quarantine.quarantine_message( message=entry, secret_type=confirmed_secret['secret_type'], severity=confirmed_secret['severity'], redacted_content=sanitized_content ) # Potentially trigger alerts here as well print(f"[ALERT] Confirmed secret {confirmed_secret['secret_type']} found and quarantined.") # Write the (potentially redacted) content to the sanitized log self.writer.write(sanitized_content)