Docs: Comprehensive inline rustdoc and architectural summary PDF

This commit is contained in:
anthonyrawlins
2026-03-03 18:05:53 +11:00
parent cc03616918
commit 0f28e4b669
2932 changed files with 14552 additions and 74 deletions

View File

@@ -1,23 +1,63 @@
use regex::Regex;
use lazy_static::lazy_static;
/// # chrs-shhh
///
/// This crate provides utilities for redacting sensitive information from text.
/// It defines a set of **redaction rules** that match secret patterns (like API keys)
/// and replace them with a placeholder. The crate is deliberately lightweight it
/// only depends on `regex` and `lazy_static` and can be embedded in any larger
/// application that needs to scrub logs or userprovided data before storage or
/// transmission.
use regex::Regex;
/// Represents a single rule used to redact a secret.
///
/// * **WHAT** The name of the rule (e.g. "OpenAI API Key"), the compiled
/// regularexpression pattern that matches the secret, and the replacement string
/// that will be inserted.
/// * **HOW** The `pattern` is a `Regex` that is applied to an input string. When a
/// match is found the `replacement` is inserted using `replace_all`.
/// * **WHY** Decoupling the rule definition from the redaction logic makes the
/// sanitizer extensible; new patterns can be added without changing the core
/// implementation.
pub struct RedactionRule {
/// Humanreadable name for the rule.
pub name: String,
/// Compiled regular expression that matches the secret.
pub pattern: Regex,
/// Text that will replace the matched secret.
pub replacement: String,
}
/// The main entry point for secret detection and redaction.
///
/// * **WHAT** Holds a collection of `RedactionRule`s.
/// * **HOW** Provides methods to scrub a string (`scrub_text`) and to simply
/// check whether any secret is present (`contains_secrets`).
/// * **WHY** Centralising the rules in a struct enables reuse and makes testing
/// straightforward.
pub struct SecretSentinel {
rules: Vec<RedactionRule>,
}
lazy_static! {
/// Matches OpenAI API keys of the form `sk-<48 alphanumeric chars>`.
static ref OPENAI_KEY: Regex = Regex::new(r"sk-[a-zA-Z0-9]{48}").unwrap();
/// Matches AWS access keys that start with `AKIA` followed by 16 uppercase letters or digits.
static ref AWS_KEY: Regex = Regex::new(r"AKIA[0-9A-Z]{16}").unwrap();
/// Generic secret pattern that captures common keywords like password, secret, key or token.
/// The capture group (`$1`) is retained so that the surrounding identifier is preserved.
static ref GENERIC_SECRET: Regex = Regex::new(r"(?i)(password|secret|key|token)\s*[:=]\s*[^\s]+").unwrap();
}
impl SecretSentinel {
/// Constructs a `SecretSentinel` prepopulated with a sensible default set of rules.
///
/// * **WHAT** Returns a sentinel containing three rules: OpenAI, AWS and a generic
/// secret matcher.
/// * **HOW** Instantiates `RedactionRule`s using the lazilyinitialised regexes
/// above and stores them in the `rules` vector.
/// * **WHY** Provides a readytouse configuration for typical development
/// environments while still allowing callers to create custom instances.
pub fn new_default() -> Self {
let rules = vec![
RedactionRule {
@@ -33,20 +73,36 @@ impl SecretSentinel {
RedactionRule {
name: "Generic Secret".into(),
pattern: GENERIC_SECRET.clone(),
// $1 refers to the captured keyword (password, secret, …).
replacement: "$1: [REDACTED]".into(),
},
];
Self { rules }
}
/// Redacts all secrets found in `input` according to the configured rules.
///
/// * **WHAT** Returns a new `String` where each match has been replaced.
/// * **HOW** Iterates over the rules and applies `replace_all` for each.
/// * **WHY** Performing the replacements sequentially ensures that overlapping
/// patterns are handled deterministically.
pub fn scrub_text(&self, input: &str) -> String {
let mut scrubbed = input.to_string();
for rule in &self.rules {
scrubbed = rule.pattern.replace_all(&scrubbed, &rule.replacement).to_string();
scrubbed = rule
.pattern
.replace_all(&scrubbed, &rule.replacement)
.to_string();
}
scrubbed
}
/// Checks whether any of the configured rules match `input`.
///
/// * **WHAT** Returns `true` if at least one rule's pattern matches.
/// * **HOW** Uses `Iter::any` over `self.rules` with `is_match`.
/// * **WHY** A quick predicate useful for shortcircuiting logging or error
/// handling before performing the full redaction.
pub fn contains_secrets(&self, input: &str) -> bool {
self.rules.iter().any(|rule| rule.pattern.is_match(input))
}