Files
CHORUS/chrs-shhh/src/lib.rs

139 lines
5.8 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use lazy_static::lazy_static;
/// # chrs-shhh
///
/// This crate provides utilities for redacting sensitive information from text.
/// It defines a set of **redaction rules** that match secret patterns (like API keys)
/// and replace them with a placeholder. The crate is deliberately lightweight it
/// only depends on `regex` and `lazy_static` and can be embedded in any larger
/// application that needs to scrub logs or userprovided data before storage or
/// transmission.
use regex::Regex;
/// Represents a single rule used to redact a secret.
///
/// * **WHAT** The name of the rule (e.g. "OpenAI API Key"), the compiled
/// regularexpression pattern that matches the secret, and the replacement string
/// that will be inserted.
/// * **HOW** The `pattern` is a `Regex` that is applied to an input string. When a
/// match is found the `replacement` is inserted using `replace_all`.
/// * **WHY** Decoupling the rule definition from the redaction logic makes the
/// sanitizer extensible; new patterns can be added without changing the core
/// implementation.
pub struct RedactionRule {
/// Humanreadable name for the rule.
pub name: String,
/// Compiled regular expression that matches the secret.
pub pattern: Regex,
/// Text that will replace the matched secret.
pub replacement: String,
}
/// The main entry point for secret detection and redaction.
///
/// * **WHAT** Holds a collection of `RedactionRule`s.
/// * **HOW** Provides methods to scrub a string (`scrub_text`) and to simply
/// check whether any secret is present (`contains_secrets`).
/// * **WHY** Centralising the rules in a struct enables reuse and makes testing
/// straightforward.
pub struct SecretSentinel {
rules: Vec<RedactionRule>,
}
lazy_static! {
/// Matches OpenAI API keys of the form `sk-<48 alphanumeric chars>`.
static ref OPENAI_KEY: Regex = Regex::new(r"sk-[a-zA-Z0-9]{48}").unwrap();
/// Matches AWS access keys that start with `AKIA` followed by 16 uppercase letters or digits.
static ref AWS_KEY: Regex = Regex::new(r"AKIA[0-9A-Z]{16}").unwrap();
/// Generic secret pattern that captures common keywords like password, secret, key or token.
/// The capture group (`$1`) is retained so that the surrounding identifier is preserved.
static ref GENERIC_SECRET: Regex = Regex::new(r"(?i)(password|secret|key|token)\s*[:=]\s*[^\s]+").unwrap();
}
impl SecretSentinel {
/// Constructs a `SecretSentinel` prepopulated with a sensible default set of rules.
///
/// * **WHAT** Returns a sentinel containing three rules: OpenAI, AWS and a generic
/// secret matcher.
/// * **HOW** Instantiates `RedactionRule`s using the lazilyinitialised regexes
/// above and stores them in the `rules` vector.
/// * **WHY** Provides a readytouse configuration for typical development
/// environments while still allowing callers to create custom instances.
pub fn new_default() -> Self {
let rules = vec![
RedactionRule {
name: "OpenAI API Key".into(),
pattern: OPENAI_KEY.clone(),
replacement: "[REDACTED OPENAI KEY]".into(),
},
RedactionRule {
name: "AWS Access Key".into(),
pattern: AWS_KEY.clone(),
replacement: "[REDACTED AWS KEY]".into(),
},
RedactionRule {
name: "Generic Secret".into(),
pattern: GENERIC_SECRET.clone(),
// $1 refers to the captured keyword (password, secret, …).
replacement: "$1: [REDACTED]".into(),
},
];
Self { rules }
}
/// Redacts all secrets found in `input` according to the configured rules.
///
/// * **WHAT** Returns a new `String` where each match has been replaced.
/// * **HOW** Iterates over the rules and applies `replace_all` for each.
/// * **WHY** Performing the replacements sequentially ensures that overlapping
/// patterns are handled deterministically.
pub fn scrub_text(&self, input: &str) -> String {
let mut scrubbed = input.to_string();
for rule in &self.rules {
scrubbed = rule
.pattern
.replace_all(&scrubbed, &rule.replacement)
.to_string();
}
scrubbed
}
/// Checks whether any of the configured rules match `input`.
///
/// * **WHAT** Returns `true` if at least one rule's pattern matches.
/// * **HOW** Uses `Iter::any` over `self.rules` with `is_match`.
/// * **WHY** A quick predicate useful for shortcircuiting logging or error
/// handling before performing the full redaction.
pub fn contains_secrets(&self, input: &str) -> bool {
self.rules.iter().any(|rule| rule.pattern.is_match(input))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_scrub_openai_key() {
let sentinel = SecretSentinel::new_default();
let input = "My key is sk-1234567890abcdef1234567890abcdef1234567890abcdef";
let output = sentinel.scrub_text(input);
assert!(output.contains("[REDACTED OPENAI KEY]"));
assert!(!output.contains("sk-1234567890"));
}
#[test]
fn test_scrub_generic_password() {
let sentinel = SecretSentinel::new_default();
let input = "login with password: my-secret-password now";
let output = sentinel.scrub_text(input);
assert!(output.contains("password: [REDACTED]"));
}
#[test]
fn test_contains_secrets() {
let sentinel = SecretSentinel::new_default();
assert!(sentinel.contains_secrets("AKIAIOSFODNN7EXAMPLE"));
assert!(!sentinel.contains_secrets("nothing sensitive here"));
}
}