Docs: Comprehensive inline rustdoc and architectural summary PDF

This commit is contained in:
anthonyrawlins
2026-03-03 18:05:53 +11:00
parent cc03616918
commit 0f28e4b669
2932 changed files with 14552 additions and 74 deletions

View File

@@ -1,18 +1,63 @@
/// # chrs-bubble
///
/// A provenancetracking crate that records nodes and edges in a directed acyclic
/// graph (DAG) and persists them using a Doltbacked graph implementation.
/// The crate is deliberately small it only pulls in `petgraph` for the inmemory
/// DAG, `serde` for serialization, `uuid` for unique identifiers and `thiserror`
/// for ergonomic error handling. It is used by higherlevel components that need
/// to capture the provenance of generated artifacts (e.g. files, messages, or
/// results) and later query that history.
///
/// The public API is organised around three concepts:
/// * **ProvenanceEdge** The type of relationship between two nodes.
/// * **BubbleError** Errors that can occur when interacting with the underlying
/// Dolt graph or when a node cannot be found.
/// * **ProvenanceGraph** The façade that holds an inmemory DAG and a
/// `DoltGraph` persistence layer, exposing methods to record nodes and links.
///
/// Each item is documented with a *WHAT*, *HOW* and *WHY* section so that users can
/// quickly understand its purpose, its implementation details and the design
/// rationale.
use chrs_graph::{DoltGraph, GraphError};
use ucxl::UCXLAddress;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use uuid::Uuid;
use petgraph::graph::{DiGraph, NodeIndex};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use thiserror::Error;
use ucxl::UCXLAddress;
use uuid::Uuid;
/// Represents the kind of relationship between two provenance nodes.
///
/// * **WHAT** An enumeration of supported edge types. Currently we support:
/// - `DerivedFrom` Indicates that the target was derived from the source.
/// - `Cites` A citation relationship.
/// - `InfluencedBy` Denotes influence without direct derivation.
/// * **HOW** Used as the edge payload in the `petgraph::DiGraph`. The enum is
/// `#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]` so it
/// can be serialised when persisting the graph.
/// * **WHY** Encoding edge semantics as a dedicated enum makes provenance
/// queries expressive and typesafe, while keeping the ondisk representation
/// simple (a stringified variant).
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
pub enum ProvenanceEdge {
/// The target node was *derived* from the source node.
DerivedFrom,
/// The target node *cites* the source node.
Cites,
/// The target node was *influenced* by the source node.
InfluencedBy,
}
/// Errors that can arise when working with a `ProvenanceGraph`.
///
/// * **WHAT** Enumerates possible failure modes:
/// - Graphlevel errors (`GraphError`).
/// - Serde JSON errors (`serde_json::Error`).
/// - A lookup failure when a node identifier cannot be resolved.
/// * **HOW** Implements `std::error::Error` via the `thiserror::Error` derive
/// macro, forwarding underlying error sources with `#[from]`.
/// * **WHY** A single error type simplifies error propagation for callers and
/// retains the original context for debugging.
#[derive(Debug, Error)]
pub enum BubbleError {
#[error("Graph error: {0}")]
@@ -23,6 +68,22 @@ pub enum BubbleError {
NodeNotFound(Uuid),
}
/// Core structure that maintains an inmemory DAG of provenance nodes and a
/// persistent `DoltGraph` backend.
///
/// * **WHAT** Holds:
/// - `persistence`: The Doltbased storage implementation.
/// - `dag`: A `petgraph::DiGraph` where node payloads are UUIDs and edges are
/// `ProvenanceEdge`s.
/// - `node_map`: A fast lookup map from node UUID to the corresponding
/// `petgraph::NodeIndex`.
/// * **HOW** Provides methods to create nodes (`record_node`) and edges
/// (`record_link`). These methods insert into the inmemory graph and then
/// persist the data in Dolt tables using simple `INSERT` statements followed by
/// a `commit`.
/// * **WHY** Separating the transient inmemory representation from durable
/// storage gives fast runtime queries while guaranteeing that the provenance
/// graph can survive process restarts and be inspected via Dolt tools.
pub struct ProvenanceGraph {
persistence: DoltGraph,
dag: DiGraph<Uuid, ProvenanceEdge>,
@@ -30,6 +91,13 @@ pub struct ProvenanceGraph {
}
impl ProvenanceGraph {
/// Creates a new `ProvenanceGraph` backed by a preinitialised `DoltGraph`.
///
/// * **WHAT** Returns a fresh instance with empty inmemory structures.
/// * **HOW** Stores the supplied `persistence` and constructs a new `DiGraph`
/// and empty `HashMap`.
/// * **WHY** Allows callers to decide where the Dolt repository lives (e.g.
/// a temporary directory for tests or a permanent location for production).
pub fn new(persistence: DoltGraph) -> Self {
Self {
persistence,
@@ -38,33 +106,73 @@ impl ProvenanceGraph {
}
}
/// Records a provenance node with a unique `Uuid` and an associated address.
///
/// * **WHAT** Persists the node both inmemory (`dag` + `node_map`) and in a
/// Dolt table called `provenance_nodes`.
/// * **HOW** If the node does not already exist, it is added to the DAG and a
/// row is inserted via `persistence.insert_node`. A commit is performed with a
/// descriptive message.
/// * **WHY** Storing the address (typically a UCXL address) allows later
/// resolution of where the artifact originated.
pub fn record_node(&mut self, id: Uuid, address: &str) -> Result<(), BubbleError> {
if !self.node_map.contains_key(&id) {
let idx = self.dag.add_node(id);
self.node_map.insert(id, idx);
// Persist
self.persistence.create_table("provenance_nodes", "id VARCHAR(255) PRIMARY KEY, address TEXT")
// Ensure the backing table exists ignore errors if it already does.
self.persistence
.create_table(
"provenance_nodes",
"id VARCHAR(255) PRIMARY KEY, address TEXT",
)
.ok();
let data = serde_json::json!({
"id": id.to_string(),
"address": address
"address": address,
});
self.persistence.insert_node("provenance_nodes", data)?;
self.persistence.commit(&format!("Record provenance node: {}", id))?;
self.persistence
.commit(&format!("Record provenance node: {}", id))?;
}
Ok(())
}
pub fn record_link(&mut self, source: Uuid, target: Uuid, edge: ProvenanceEdge) -> Result<(), BubbleError> {
let source_idx = *self.node_map.get(&source).ok_or(BubbleError::NodeNotFound(source))?;
let target_idx = *self.node_map.get(&target).ok_or(BubbleError::NodeNotFound(target))?;
/// Records a directed edge between two existing nodes.
///
/// * **WHAT** Adds an edge of type `ProvenanceEdge` to the DAG and stores a
/// corresponding row in the `provenance_links` Dolt table.
/// * **HOW** Retrieves the `NodeIndex` for each UUID (erroring with
/// `BubbleError::NodeNotFound` if missing), adds the edge to `dag`, then
/// inserts a row containing a new link UUID, source/target IDs and the edge
/// type as a string.
/// * **WHY** Persisting links allows the full provenance graph to be queried
/// outside the process, while the inmemory representation keeps runtime
/// operations cheap.
pub fn record_link(
&mut self,
source: Uuid,
target: Uuid,
edge: ProvenanceEdge,
) -> Result<(), BubbleError> {
let source_idx = *self
.node_map
.get(&source)
.ok_or(BubbleError::NodeNotFound(source))?;
let target_idx = *self
.node_map
.get(&target)
.ok_or(BubbleError::NodeNotFound(target))?;
self.dag.add_edge(source_idx, target_idx, edge);
// Persist
self.persistence.create_table("provenance_links", "id VARCHAR(255) PRIMARY KEY, source_id TEXT, target_id TEXT, edge_type TEXT")
// Ensure the links table exists.
self.persistence
.create_table(
"provenance_links",
"id VARCHAR(255) PRIMARY KEY, source_id TEXT, target_id TEXT, edge_type TEXT",
)
.ok();
let link_id = Uuid::new_v4();
@@ -72,12 +180,11 @@ impl ProvenanceGraph {
"id": link_id.to_string(),
"source_id": source.to_string(),
"target_id": target.to_string(),
"edge_type": format!("{:?}", edge)
"edge_type": format!("{:?}", edge),
});
self.persistence.insert_node("provenance_links", data)?;
self.persistence.commit(&format!("Record provenance link: {} -> {}", source, target))?;
self.persistence
.commit(&format!("Record provenance link: {} -> {}", source, target))?;
Ok(())
}
}
@@ -96,9 +203,15 @@ mod tests {
let id1 = Uuid::new_v4();
let id2 = Uuid::new_v4();
graph.record_node(id1, "ucxl://agent:1@proj:task/#/file1.txt").unwrap();
graph.record_node(id2, "ucxl://agent:1@proj:task/#/file2.txt").unwrap();
graph
.record_node(id1, "ucxl://agent:1@proj:task/#/file1.txt")
.unwrap();
graph
.record_node(id2, "ucxl://agent:1@proj:task/#/file2.txt")
.unwrap();
graph.record_link(id1, id2, ProvenanceEdge::DerivedFrom).unwrap();
graph
.record_link(id1, id2, ProvenanceEdge::DerivedFrom)
.unwrap();
}
}