Pre-cleanup snapshot - all current files
🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
66
modules/shhh/pipeline/processor.py
Normal file
66
modules/shhh/pipeline/processor.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import asyncio
|
||||
from core.hypercore_reader import HypercoreReader
|
||||
from core.detector import SecretDetector
|
||||
from core.llm_analyzer import LLMAnalyzer
|
||||
from core.quarantine import QuarantineManager
|
||||
from core.sanitized_writer import SanitizedWriter
|
||||
|
||||
class MessageProcessor:
|
||||
def __init__(self, reader: HypercoreReader, detector: SecretDetector, llm_analyzer: LLMAnalyzer, quarantine: QuarantineManager, writer: SanitizedWriter, llm_threshold: float):
|
||||
self.reader = reader
|
||||
self.detector = detector
|
||||
self.llm_analyzer = llm_analyzer
|
||||
self.quarantine = quarantine
|
||||
self.writer = writer
|
||||
self.llm_threshold = llm_threshold # e.g., 0.90
|
||||
|
||||
async def process_stream(self):
|
||||
"""Main processing loop for the hybrid detection model."""
|
||||
async for entry in self.reader.stream_entries():
|
||||
# Stage 1: Fast Regex Scan
|
||||
regex_matches = self.detector.scan(entry.content)
|
||||
|
||||
if not regex_matches:
|
||||
# No secrets found, write original entry to sanitized log
|
||||
self.writer.write(entry.content)
|
||||
continue
|
||||
|
||||
# A potential secret was found. Default to sanitized, but may be quarantined.
|
||||
sanitized_content = entry.content
|
||||
should_quarantine = False
|
||||
confirmed_secret = None
|
||||
|
||||
for match in regex_matches:
|
||||
# High-confidence regex matches trigger immediate quarantine, skipping LLM.
|
||||
if match['confidence'] >= self.llm_threshold:
|
||||
should_quarantine = True
|
||||
confirmed_secret = match
|
||||
break # One high-confidence match is enough
|
||||
|
||||
# Stage 2: Low-confidence matches go to LLM for verification.
|
||||
llm_result = self.llm_analyzer.analyze(entry.content)
|
||||
if llm_result.get("secret_found"):
|
||||
should_quarantine = True
|
||||
# Prefer LLM's classification but use regex value for redaction
|
||||
confirmed_secret = {
|
||||
"secret_type": llm_result.get("secret_type", match['secret_type']),
|
||||
"value": match['value'],
|
||||
"severity": llm_result.get("severity", match['severity'])
|
||||
}
|
||||
break
|
||||
|
||||
if should_quarantine and confirmed_secret:
|
||||
# A secret is confirmed. Redact, quarantine, and alert.
|
||||
sanitized_content = self.detector.redact(entry.content, confirmed_secret['value'])
|
||||
|
||||
self.quarantine.quarantine_message(
|
||||
message=entry,
|
||||
secret_type=confirmed_secret['secret_type'],
|
||||
severity=confirmed_secret['severity'],
|
||||
redacted_content=sanitized_content
|
||||
)
|
||||
# Potentially trigger alerts here as well
|
||||
print(f"[ALERT] Confirmed secret {confirmed_secret['secret_type']} found and quarantined.")
|
||||
|
||||
# Write the (potentially redacted) content to the sanitized log
|
||||
self.writer.write(sanitized_content)
|
||||
Reference in New Issue
Block a user