Pre-cleanup snapshot - all current files

🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-05 02:32:45 +10:00
parent 26079aa8da
commit 4511f4c801
32 changed files with 5072 additions and 0 deletions
--- a/modules/shhh/pipeline/processor.py
+++ b/modules/shhh/pipeline/processor.py
@@ -0,0 +1,66 @@
+import asyncio
+from core.hypercore_reader import HypercoreReader
+from core.detector import SecretDetector
+from core.llm_analyzer import LLMAnalyzer
+from core.quarantine import QuarantineManager
+from core.sanitized_writer import SanitizedWriter
+
+class MessageProcessor:
+    def __init__(self, reader: HypercoreReader, detector: SecretDetector, llm_analyzer: LLMAnalyzer, quarantine: QuarantineManager, writer: SanitizedWriter, llm_threshold: float):
+        self.reader = reader
+        self.detector = detector
+        self.llm_analyzer = llm_analyzer
+        self.quarantine = quarantine
+        self.writer = writer
+        self.llm_threshold = llm_threshold # e.g., 0.90
+
+    async def process_stream(self):
+        """Main processing loop for the hybrid detection model."""
+        async for entry in self.reader.stream_entries():
+            # Stage 1: Fast Regex Scan
+            regex_matches = self.detector.scan(entry.content)
+
+            if not regex_matches:
+                # No secrets found, write original entry to sanitized log
+                self.writer.write(entry.content)
+                continue
+
+            # A potential secret was found. Default to sanitized, but may be quarantined.
+            sanitized_content = entry.content
+            should_quarantine = False
+            confirmed_secret = None
+
+            for match in regex_matches:
+                # High-confidence regex matches trigger immediate quarantine, skipping LLM.
+                if match['confidence'] >= self.llm_threshold:
+                    should_quarantine = True
+                    confirmed_secret = match
+                    break # One high-confidence match is enough
+
+                # Stage 2: Low-confidence matches go to LLM for verification.
+                llm_result = self.llm_analyzer.analyze(entry.content)
+                if llm_result.get("secret_found"):
+                    should_quarantine = True
+                    # Prefer LLM's classification but use regex value for redaction
+                    confirmed_secret = {
+                        "secret_type": llm_result.get("secret_type", match['secret_type']),
+                        "value": match['value'],
+                        "severity": llm_result.get("severity", match['severity'])
+                    }
+                    break
+
+            if should_quarantine and confirmed_secret:
+                # A secret is confirmed. Redact, quarantine, and alert.
+                sanitized_content = self.detector.redact(entry.content, confirmed_secret['value'])
+                
+                self.quarantine.quarantine_message(
+                    message=entry,
+                    secret_type=confirmed_secret['secret_type'],
+                    severity=confirmed_secret['severity'],
+                    redacted_content=sanitized_content
+                )
+                # Potentially trigger alerts here as well
+                print(f"[ALERT] Confirmed secret {confirmed_secret['secret_type']} found and quarantined.")
+
+            # Write the (potentially redacted) content to the sanitized log
+            self.writer.write(sanitized_content)