Docs: Comprehensive inline rustdoc and architectural summary PDF
This commit is contained in:
@@ -1,18 +1,63 @@
|
||||
/// # chrs-bubble
|
||||
///
|
||||
/// A provenance‑tracking crate that records nodes and edges in a directed acyclic
|
||||
/// graph (DAG) and persists them using a Dolt‑backed graph implementation.
|
||||
/// The crate is deliberately small – it only pulls in `petgraph` for the in‑memory
|
||||
/// DAG, `serde` for serialization, `uuid` for unique identifiers and `thiserror`
|
||||
/// for ergonomic error handling. It is used by higher‑level components that need
|
||||
/// to capture the provenance of generated artifacts (e.g. files, messages, or
|
||||
/// results) and later query that history.
|
||||
///
|
||||
/// The public API is organised around three concepts:
|
||||
/// * **ProvenanceEdge** – The type of relationship between two nodes.
|
||||
/// * **BubbleError** – Errors that can occur when interacting with the underlying
|
||||
/// Dolt graph or when a node cannot be found.
|
||||
/// * **ProvenanceGraph** – The façade that holds an in‑memory DAG and a
|
||||
/// `DoltGraph` persistence layer, exposing methods to record nodes and links.
|
||||
///
|
||||
/// Each item is documented with a *WHAT*, *HOW* and *WHY* section so that users can
|
||||
/// quickly understand its purpose, its implementation details and the design
|
||||
/// rationale.
|
||||
use chrs_graph::{DoltGraph, GraphError};
|
||||
use ucxl::UCXLAddress;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
use uuid::Uuid;
|
||||
use petgraph::graph::{DiGraph, NodeIndex};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use thiserror::Error;
|
||||
use ucxl::UCXLAddress;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Represents the kind of relationship between two provenance nodes.
|
||||
///
|
||||
/// * **WHAT** – An enumeration of supported edge types. Currently we support:
|
||||
/// - `DerivedFrom` – Indicates that the target was derived from the source.
|
||||
/// - `Cites` – A citation relationship.
|
||||
/// - `InfluencedBy` – Denotes influence without direct derivation.
|
||||
/// * **HOW** – Used as the edge payload in the `petgraph::DiGraph`. The enum is
|
||||
/// `#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]` so it
|
||||
/// can be serialised when persisting the graph.
|
||||
/// * **WHY** – Encoding edge semantics as a dedicated enum makes provenance
|
||||
/// queries expressive and type‑safe, while keeping the on‑disk representation
|
||||
/// simple (a stringified variant).
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum ProvenanceEdge {
|
||||
/// The target node was *derived* from the source node.
|
||||
DerivedFrom,
|
||||
/// The target node *cites* the source node.
|
||||
Cites,
|
||||
/// The target node was *influenced* by the source node.
|
||||
InfluencedBy,
|
||||
}
|
||||
|
||||
/// Errors that can arise when working with a `ProvenanceGraph`.
|
||||
///
|
||||
/// * **WHAT** – Enumerates possible failure modes:
|
||||
/// - Graph‑level errors (`GraphError`).
|
||||
/// - Serde JSON errors (`serde_json::Error`).
|
||||
/// - A lookup failure when a node identifier cannot be resolved.
|
||||
/// * **HOW** – Implements `std::error::Error` via the `thiserror::Error` derive
|
||||
/// macro, forwarding underlying error sources with `#[from]`.
|
||||
/// * **WHY** – A single error type simplifies error propagation for callers and
|
||||
/// retains the original context for debugging.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum BubbleError {
|
||||
#[error("Graph error: {0}")]
|
||||
@@ -23,6 +68,22 @@ pub enum BubbleError {
|
||||
NodeNotFound(Uuid),
|
||||
}
|
||||
|
||||
/// Core structure that maintains an in‑memory DAG of provenance nodes and a
|
||||
/// persistent `DoltGraph` backend.
|
||||
///
|
||||
/// * **WHAT** – Holds:
|
||||
/// - `persistence`: The Dolt‑based storage implementation.
|
||||
/// - `dag`: A `petgraph::DiGraph` where node payloads are UUIDs and edges are
|
||||
/// `ProvenanceEdge`s.
|
||||
/// - `node_map`: A fast lookup map from node UUID to the corresponding
|
||||
/// `petgraph::NodeIndex`.
|
||||
/// * **HOW** – Provides methods to create nodes (`record_node`) and edges
|
||||
/// (`record_link`). These methods insert into the in‑memory graph and then
|
||||
/// persist the data in Dolt tables using simple `INSERT` statements followed by
|
||||
/// a `commit`.
|
||||
/// * **WHY** – Separating the transient in‑memory representation from durable
|
||||
/// storage gives fast runtime queries while guaranteeing that the provenance
|
||||
/// graph can survive process restarts and be inspected via Dolt tools.
|
||||
pub struct ProvenanceGraph {
|
||||
persistence: DoltGraph,
|
||||
dag: DiGraph<Uuid, ProvenanceEdge>,
|
||||
@@ -30,6 +91,13 @@ pub struct ProvenanceGraph {
|
||||
}
|
||||
|
||||
impl ProvenanceGraph {
|
||||
/// Creates a new `ProvenanceGraph` backed by a pre‑initialised `DoltGraph`.
|
||||
///
|
||||
/// * **WHAT** – Returns a fresh instance with empty in‑memory structures.
|
||||
/// * **HOW** – Stores the supplied `persistence` and constructs a new `DiGraph`
|
||||
/// and empty `HashMap`.
|
||||
/// * **WHY** – Allows callers to decide where the Dolt repository lives (e.g.
|
||||
/// a temporary directory for tests or a permanent location for production).
|
||||
pub fn new(persistence: DoltGraph) -> Self {
|
||||
Self {
|
||||
persistence,
|
||||
@@ -38,33 +106,73 @@ impl ProvenanceGraph {
|
||||
}
|
||||
}
|
||||
|
||||
/// Records a provenance node with a unique `Uuid` and an associated address.
|
||||
///
|
||||
/// * **WHAT** – Persists the node both in‑memory (`dag` + `node_map`) and in a
|
||||
/// Dolt table called `provenance_nodes`.
|
||||
/// * **HOW** – If the node does not already exist, it is added to the DAG and a
|
||||
/// row is inserted via `persistence.insert_node`. A commit is performed with a
|
||||
/// descriptive message.
|
||||
/// * **WHY** – Storing the address (typically a UCXL address) allows later
|
||||
/// resolution of where the artifact originated.
|
||||
pub fn record_node(&mut self, id: Uuid, address: &str) -> Result<(), BubbleError> {
|
||||
if !self.node_map.contains_key(&id) {
|
||||
let idx = self.dag.add_node(id);
|
||||
self.node_map.insert(id, idx);
|
||||
|
||||
// Persist
|
||||
self.persistence.create_table("provenance_nodes", "id VARCHAR(255) PRIMARY KEY, address TEXT")
|
||||
// Ensure the backing table exists – ignore errors if it already does.
|
||||
self.persistence
|
||||
.create_table(
|
||||
"provenance_nodes",
|
||||
"id VARCHAR(255) PRIMARY KEY, address TEXT",
|
||||
)
|
||||
.ok();
|
||||
|
||||
|
||||
let data = serde_json::json!({
|
||||
"id": id.to_string(),
|
||||
"address": address
|
||||
"address": address,
|
||||
});
|
||||
self.persistence.insert_node("provenance_nodes", data)?;
|
||||
self.persistence.commit(&format!("Record provenance node: {}", id))?;
|
||||
self.persistence
|
||||
.commit(&format!("Record provenance node: {}", id))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn record_link(&mut self, source: Uuid, target: Uuid, edge: ProvenanceEdge) -> Result<(), BubbleError> {
|
||||
let source_idx = *self.node_map.get(&source).ok_or(BubbleError::NodeNotFound(source))?;
|
||||
let target_idx = *self.node_map.get(&target).ok_or(BubbleError::NodeNotFound(target))?;
|
||||
/// Records a directed edge between two existing nodes.
|
||||
///
|
||||
/// * **WHAT** – Adds an edge of type `ProvenanceEdge` to the DAG and stores a
|
||||
/// corresponding row in the `provenance_links` Dolt table.
|
||||
/// * **HOW** – Retrieves the `NodeIndex` for each UUID (erroring with
|
||||
/// `BubbleError::NodeNotFound` if missing), adds the edge to `dag`, then
|
||||
/// inserts a row containing a new link UUID, source/target IDs and the edge
|
||||
/// type as a string.
|
||||
/// * **WHY** – Persisting links allows the full provenance graph to be queried
|
||||
/// outside the process, while the in‑memory representation keeps runtime
|
||||
/// operations cheap.
|
||||
pub fn record_link(
|
||||
&mut self,
|
||||
source: Uuid,
|
||||
target: Uuid,
|
||||
edge: ProvenanceEdge,
|
||||
) -> Result<(), BubbleError> {
|
||||
let source_idx = *self
|
||||
.node_map
|
||||
.get(&source)
|
||||
.ok_or(BubbleError::NodeNotFound(source))?;
|
||||
let target_idx = *self
|
||||
.node_map
|
||||
.get(&target)
|
||||
.ok_or(BubbleError::NodeNotFound(target))?;
|
||||
|
||||
self.dag.add_edge(source_idx, target_idx, edge);
|
||||
|
||||
// Persist
|
||||
self.persistence.create_table("provenance_links", "id VARCHAR(255) PRIMARY KEY, source_id TEXT, target_id TEXT, edge_type TEXT")
|
||||
// Ensure the links table exists.
|
||||
self.persistence
|
||||
.create_table(
|
||||
"provenance_links",
|
||||
"id VARCHAR(255) PRIMARY KEY, source_id TEXT, target_id TEXT, edge_type TEXT",
|
||||
)
|
||||
.ok();
|
||||
|
||||
let link_id = Uuid::new_v4();
|
||||
@@ -72,12 +180,11 @@ impl ProvenanceGraph {
|
||||
"id": link_id.to_string(),
|
||||
"source_id": source.to_string(),
|
||||
"target_id": target.to_string(),
|
||||
"edge_type": format!("{:?}", edge)
|
||||
"edge_type": format!("{:?}", edge),
|
||||
});
|
||||
|
||||
self.persistence.insert_node("provenance_links", data)?;
|
||||
self.persistence.commit(&format!("Record provenance link: {} -> {}", source, target))?;
|
||||
|
||||
self.persistence
|
||||
.commit(&format!("Record provenance link: {} -> {}", source, target))?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -96,9 +203,15 @@ mod tests {
|
||||
let id1 = Uuid::new_v4();
|
||||
let id2 = Uuid::new_v4();
|
||||
|
||||
graph.record_node(id1, "ucxl://agent:1@proj:task/#/file1.txt").unwrap();
|
||||
graph.record_node(id2, "ucxl://agent:1@proj:task/#/file2.txt").unwrap();
|
||||
graph
|
||||
.record_node(id1, "ucxl://agent:1@proj:task/#/file1.txt")
|
||||
.unwrap();
|
||||
graph
|
||||
.record_node(id2, "ucxl://agent:1@proj:task/#/file2.txt")
|
||||
.unwrap();
|
||||
|
||||
graph.record_link(id1, id2, ProvenanceEdge::DerivedFrom).unwrap();
|
||||
graph
|
||||
.record_link(id1, id2, ProvenanceEdge::DerivedFrom)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user