use lazy_static::lazy_static; /// # chrs-shhh /// /// This crate provides utilities for redacting sensitive information from text. /// It defines a set of **redaction rules** that match secret patterns (like API keys) /// and replace them with a placeholder. The crate is deliberately lightweight – it /// only depends on `regex` and `lazy_static` – and can be embedded in any larger /// application that needs to scrub logs or user‑provided data before storage or /// transmission. use regex::Regex; /// Represents a single rule used to redact a secret. /// /// * **WHAT** – The name of the rule (e.g. "OpenAI API Key"), the compiled /// regular‑expression pattern that matches the secret, and the replacement string /// that will be inserted. /// * **HOW** – The `pattern` is a `Regex` that is applied to an input string. When a /// match is found the `replacement` is inserted using `replace_all`. /// * **WHY** – Decoupling the rule definition from the redaction logic makes the /// sanitizer extensible; new patterns can be added without changing the core /// implementation. pub struct RedactionRule { /// Human‑readable name for the rule. pub name: String, /// Compiled regular expression that matches the secret. pub pattern: Regex, /// Text that will replace the matched secret. pub replacement: String, } /// The main entry point for secret detection and redaction. /// /// * **WHAT** – Holds a collection of `RedactionRule`s. /// * **HOW** – Provides methods to scrub a string (`scrub_text`) and to simply /// check whether any secret is present (`contains_secrets`). /// * **WHY** – Centralising the rules in a struct enables reuse and makes testing /// straightforward. pub struct SecretSentinel { rules: Vec, } lazy_static! { /// Matches OpenAI API keys of the form `sk-<48 alphanumeric chars>`. static ref OPENAI_KEY: Regex = Regex::new(r"sk-[a-zA-Z0-9]{48}").unwrap(); /// Matches AWS access keys that start with `AKIA` followed by 16 uppercase letters or digits. static ref AWS_KEY: Regex = Regex::new(r"AKIA[0-9A-Z]{16}").unwrap(); /// Generic secret pattern that captures common keywords like password, secret, key or token. /// The capture group (`$1`) is retained so that the surrounding identifier is preserved. static ref GENERIC_SECRET: Regex = Regex::new(r"(?i)(password|secret|key|token)\s*[:=]\s*[^\s]+").unwrap(); } impl SecretSentinel { /// Constructs a `SecretSentinel` pre‑populated with a sensible default set of rules. /// /// * **WHAT** – Returns a sentinel containing three rules: OpenAI, AWS and a generic /// secret matcher. /// * **HOW** – Instantiates `RedactionRule`s using the lazily‑initialised regexes /// above and stores them in the `rules` vector. /// * **WHY** – Provides a ready‑to‑use configuration for typical development /// environments while still allowing callers to create custom instances. pub fn new_default() -> Self { let rules = vec![ RedactionRule { name: "OpenAI API Key".into(), pattern: OPENAI_KEY.clone(), replacement: "[REDACTED OPENAI KEY]".into(), }, RedactionRule { name: "AWS Access Key".into(), pattern: AWS_KEY.clone(), replacement: "[REDACTED AWS KEY]".into(), }, RedactionRule { name: "Generic Secret".into(), pattern: GENERIC_SECRET.clone(), // $1 refers to the captured keyword (password, secret, …). replacement: "$1: [REDACTED]".into(), }, ]; Self { rules } } /// Redacts all secrets found in `input` according to the configured rules. /// /// * **WHAT** – Returns a new `String` where each match has been replaced. /// * **HOW** – Iterates over the rules and applies `replace_all` for each. /// * **WHY** – Performing the replacements sequentially ensures that overlapping /// patterns are handled deterministically. pub fn scrub_text(&self, input: &str) -> String { let mut scrubbed = input.to_string(); for rule in &self.rules { scrubbed = rule .pattern .replace_all(&scrubbed, &rule.replacement) .to_string(); } scrubbed } /// Checks whether any of the configured rules match `input`. /// /// * **WHAT** – Returns `true` if at least one rule's pattern matches. /// * **HOW** – Uses `Iter::any` over `self.rules` with `is_match`. /// * **WHY** – A quick predicate useful for short‑circuiting logging or error /// handling before performing the full redaction. pub fn contains_secrets(&self, input: &str) -> bool { self.rules.iter().any(|rule| rule.pattern.is_match(input)) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_scrub_openai_key() { let sentinel = SecretSentinel::new_default(); let input = "My key is sk-1234567890abcdef1234567890abcdef1234567890abcdef"; let output = sentinel.scrub_text(input); assert!(output.contains("[REDACTED OPENAI KEY]")); assert!(!output.contains("sk-1234567890")); } #[test] fn test_scrub_generic_password() { let sentinel = SecretSentinel::new_default(); let input = "login with password: my-secret-password now"; let output = sentinel.scrub_text(input); assert!(output.contains("password: [REDACTED]")); } #[test] fn test_contains_secrets() { let sentinel = SecretSentinel::new_default(); assert!(sentinel.contains_secrets("AKIAIOSFODNN7EXAMPLE")); assert!(!sentinel.contains_secrets("nothing sensitive here")); } }