Docs: Comprehensive inline rustdoc and architectural summary PDF
This commit is contained in:
@@ -1,23 +1,63 @@
|
||||
use regex::Regex;
|
||||
use lazy_static::lazy_static;
|
||||
/// # chrs-shhh
|
||||
///
|
||||
/// This crate provides utilities for redacting sensitive information from text.
|
||||
/// It defines a set of **redaction rules** that match secret patterns (like API keys)
|
||||
/// and replace them with a placeholder. The crate is deliberately lightweight – it
|
||||
/// only depends on `regex` and `lazy_static` – and can be embedded in any larger
|
||||
/// application that needs to scrub logs or user‑provided data before storage or
|
||||
/// transmission.
|
||||
use regex::Regex;
|
||||
|
||||
/// Represents a single rule used to redact a secret.
|
||||
///
|
||||
/// * **WHAT** – The name of the rule (e.g. "OpenAI API Key"), the compiled
|
||||
/// regular‑expression pattern that matches the secret, and the replacement string
|
||||
/// that will be inserted.
|
||||
/// * **HOW** – The `pattern` is a `Regex` that is applied to an input string. When a
|
||||
/// match is found the `replacement` is inserted using `replace_all`.
|
||||
/// * **WHY** – Decoupling the rule definition from the redaction logic makes the
|
||||
/// sanitizer extensible; new patterns can be added without changing the core
|
||||
/// implementation.
|
||||
pub struct RedactionRule {
|
||||
/// Human‑readable name for the rule.
|
||||
pub name: String,
|
||||
/// Compiled regular expression that matches the secret.
|
||||
pub pattern: Regex,
|
||||
/// Text that will replace the matched secret.
|
||||
pub replacement: String,
|
||||
}
|
||||
|
||||
/// The main entry point for secret detection and redaction.
|
||||
///
|
||||
/// * **WHAT** – Holds a collection of `RedactionRule`s.
|
||||
/// * **HOW** – Provides methods to scrub a string (`scrub_text`) and to simply
|
||||
/// check whether any secret is present (`contains_secrets`).
|
||||
/// * **WHY** – Centralising the rules in a struct enables reuse and makes testing
|
||||
/// straightforward.
|
||||
pub struct SecretSentinel {
|
||||
rules: Vec<RedactionRule>,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
/// Matches OpenAI API keys of the form `sk-<48 alphanumeric chars>`.
|
||||
static ref OPENAI_KEY: Regex = Regex::new(r"sk-[a-zA-Z0-9]{48}").unwrap();
|
||||
/// Matches AWS access keys that start with `AKIA` followed by 16 uppercase letters or digits.
|
||||
static ref AWS_KEY: Regex = Regex::new(r"AKIA[0-9A-Z]{16}").unwrap();
|
||||
/// Generic secret pattern that captures common keywords like password, secret, key or token.
|
||||
/// The capture group (`$1`) is retained so that the surrounding identifier is preserved.
|
||||
static ref GENERIC_SECRET: Regex = Regex::new(r"(?i)(password|secret|key|token)\s*[:=]\s*[^\s]+").unwrap();
|
||||
}
|
||||
|
||||
impl SecretSentinel {
|
||||
/// Constructs a `SecretSentinel` pre‑populated with a sensible default set of rules.
|
||||
///
|
||||
/// * **WHAT** – Returns a sentinel containing three rules: OpenAI, AWS and a generic
|
||||
/// secret matcher.
|
||||
/// * **HOW** – Instantiates `RedactionRule`s using the lazily‑initialised regexes
|
||||
/// above and stores them in the `rules` vector.
|
||||
/// * **WHY** – Provides a ready‑to‑use configuration for typical development
|
||||
/// environments while still allowing callers to create custom instances.
|
||||
pub fn new_default() -> Self {
|
||||
let rules = vec![
|
||||
RedactionRule {
|
||||
@@ -33,20 +73,36 @@ impl SecretSentinel {
|
||||
RedactionRule {
|
||||
name: "Generic Secret".into(),
|
||||
pattern: GENERIC_SECRET.clone(),
|
||||
// $1 refers to the captured keyword (password, secret, …).
|
||||
replacement: "$1: [REDACTED]".into(),
|
||||
},
|
||||
];
|
||||
Self { rules }
|
||||
}
|
||||
|
||||
/// Redacts all secrets found in `input` according to the configured rules.
|
||||
///
|
||||
/// * **WHAT** – Returns a new `String` where each match has been replaced.
|
||||
/// * **HOW** – Iterates over the rules and applies `replace_all` for each.
|
||||
/// * **WHY** – Performing the replacements sequentially ensures that overlapping
|
||||
/// patterns are handled deterministically.
|
||||
pub fn scrub_text(&self, input: &str) -> String {
|
||||
let mut scrubbed = input.to_string();
|
||||
for rule in &self.rules {
|
||||
scrubbed = rule.pattern.replace_all(&scrubbed, &rule.replacement).to_string();
|
||||
scrubbed = rule
|
||||
.pattern
|
||||
.replace_all(&scrubbed, &rule.replacement)
|
||||
.to_string();
|
||||
}
|
||||
scrubbed
|
||||
}
|
||||
|
||||
/// Checks whether any of the configured rules match `input`.
|
||||
///
|
||||
/// * **WHAT** – Returns `true` if at least one rule's pattern matches.
|
||||
/// * **HOW** – Uses `Iter::any` over `self.rules` with `is_match`.
|
||||
/// * **WHY** – A quick predicate useful for short‑circuiting logging or error
|
||||
/// handling before performing the full redaction.
|
||||
pub fn contains_secrets(&self, input: &str) -> bool {
|
||||
self.rules.iter().any(|rule| rule.pattern.is_match(input))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user