chrs_bubble/lib.rs
1/// # chrs-bubble
2///
3/// A provenance‑tracking crate that records nodes and edges in a directed acyclic
4/// graph (DAG) and persists them using a Dolt‑backed graph implementation.
5/// The crate is deliberately small – it only pulls in `petgraph` for the in‑memory
6/// DAG, `serde` for serialization, `uuid` for unique identifiers and `thiserror`
7/// for ergonomic error handling. It is used by higher‑level components that need
8/// to capture the provenance of generated artifacts (e.g. files, messages, or
9/// results) and later query that history.
10///
11/// The public API is organised around three concepts:
12/// * **ProvenanceEdge** – The type of relationship between two nodes.
13/// * **BubbleError** – Errors that can occur when interacting with the underlying
14/// Dolt graph or when a node cannot be found.
15/// * **ProvenanceGraph** – The façade that holds an in‑memory DAG and a
16/// `DoltGraph` persistence layer, exposing methods to record nodes and links.
17///
18/// Each item is documented with a *WHAT*, *HOW* and *WHY* section so that users can
19/// quickly understand its purpose, its implementation details and the design
20/// rationale.
21use chrs_graph::{DoltGraph, GraphError};
22use petgraph::graph::{DiGraph, NodeIndex};
23use serde::{Deserialize, Serialize};
24use std::collections::HashMap;
25use thiserror::Error;
26use ucxl::UCXLAddress;
27use uuid::Uuid;
28
29/// Represents the kind of relationship between two provenance nodes.
30///
31/// * **WHAT** – An enumeration of supported edge types. Currently we support:
32/// - `DerivedFrom` – Indicates that the target was derived from the source.
33/// - `Cites` – A citation relationship.
34/// - `InfluencedBy` – Denotes influence without direct derivation.
35/// * **HOW** – Used as the edge payload in the `petgraph::DiGraph`. The enum is
36/// `#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]` so it
37/// can be serialised when persisting the graph.
38/// * **WHY** – Encoding edge semantics as a dedicated enum makes provenance
39/// queries expressive and type‑safe, while keeping the on‑disk representation
40/// simple (a stringified variant).
41#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
42pub enum ProvenanceEdge {
43 /// The target node was *derived* from the source node.
44 DerivedFrom,
45 /// The target node *cites* the source node.
46 Cites,
47 /// The target node was *influenced* by the source node.
48 InfluencedBy,
49}
50
51/// Errors that can arise when working with a `ProvenanceGraph`.
52///
53/// * **WHAT** – Enumerates possible failure modes:
54/// - Graph‑level errors (`GraphError`).
55/// - Serde JSON errors (`serde_json::Error`).
56/// - A lookup failure when a node identifier cannot be resolved.
57/// * **HOW** – Implements `std::error::Error` via the `thiserror::Error` derive
58/// macro, forwarding underlying error sources with `#[from]`.
59/// * **WHY** – A single error type simplifies error propagation for callers and
60/// retains the original context for debugging.
61#[derive(Debug, Error)]
62pub enum BubbleError {
63 #[error("Graph error: {0}")]
64 Graph(#[from] GraphError),
65 #[error("Serde error: {0}")]
66 Serde(#[from] serde_json::Error),
67 #[error("Node not found: {0}")]
68 NodeNotFound(Uuid),
69}
70
71/// Core structure that maintains an in‑memory DAG of provenance nodes and a
72/// persistent `DoltGraph` backend.
73///
74/// * **WHAT** – Holds:
75/// - `persistence`: The Dolt‑based storage implementation.
76/// - `dag`: A `petgraph::DiGraph` where node payloads are UUIDs and edges are
77/// `ProvenanceEdge`s.
78/// - `node_map`: A fast lookup map from node UUID to the corresponding
79/// `petgraph::NodeIndex`.
80/// * **HOW** – Provides methods to create nodes (`record_node`) and edges
81/// (`record_link`). These methods insert into the in‑memory graph and then
82/// persist the data in Dolt tables using simple `INSERT` statements followed by
83/// a `commit`.
84/// * **WHY** – Separating the transient in‑memory representation from durable
85/// storage gives fast runtime queries while guaranteeing that the provenance
86/// graph can survive process restarts and be inspected via Dolt tools.
87pub struct ProvenanceGraph {
88 persistence: DoltGraph,
89 dag: DiGraph<Uuid, ProvenanceEdge>,
90 node_map: HashMap<Uuid, NodeIndex>,
91}
92
93impl ProvenanceGraph {
94 /// Creates a new `ProvenanceGraph` backed by a pre‑initialised `DoltGraph`.
95 ///
96 /// * **WHAT** – Returns a fresh instance with empty in‑memory structures.
97 /// * **HOW** – Stores the supplied `persistence` and constructs a new `DiGraph`
98 /// and empty `HashMap`.
99 /// * **WHY** – Allows callers to decide where the Dolt repository lives (e.g.
100 /// a temporary directory for tests or a permanent location for production).
101 pub fn new(persistence: DoltGraph) -> Self {
102 Self {
103 persistence,
104 dag: DiGraph::new(),
105 node_map: HashMap::new(),
106 }
107 }
108
109 /// Records a provenance node with a unique `Uuid` and an associated address.
110 ///
111 /// * **WHAT** – Persists the node both in‑memory (`dag` + `node_map`) and in a
112 /// Dolt table called `provenance_nodes`.
113 /// * **HOW** – If the node does not already exist, it is added to the DAG and a
114 /// row is inserted via `persistence.insert_node`. A commit is performed with a
115 /// descriptive message.
116 /// * **WHY** – Storing the address (typically a UCXL address) allows later
117 /// resolution of where the artifact originated.
118 pub fn record_node(&mut self, id: Uuid, address: &str) -> Result<(), BubbleError> {
119 if !self.node_map.contains_key(&id) {
120 let idx = self.dag.add_node(id);
121 self.node_map.insert(id, idx);
122
123 // Ensure the backing table exists – ignore errors if it already does.
124 self.persistence
125 .create_table(
126 "provenance_nodes",
127 "id VARCHAR(255) PRIMARY KEY, address TEXT",
128 )
129 .ok();
130
131 let data = serde_json::json!({
132 "id": id.to_string(),
133 "address": address,
134 });
135 self.persistence.insert_node("provenance_nodes", data)?;
136 self.persistence
137 .commit(&format!("Record provenance node: {}", id))?;
138 }
139 Ok(())
140 }
141
142 /// Records a directed edge between two existing nodes.
143 ///
144 /// * **WHAT** – Adds an edge of type `ProvenanceEdge` to the DAG and stores a
145 /// corresponding row in the `provenance_links` Dolt table.
146 /// * **HOW** – Retrieves the `NodeIndex` for each UUID (erroring with
147 /// `BubbleError::NodeNotFound` if missing), adds the edge to `dag`, then
148 /// inserts a row containing a new link UUID, source/target IDs and the edge
149 /// type as a string.
150 /// * **WHY** – Persisting links allows the full provenance graph to be queried
151 /// outside the process, while the in‑memory representation keeps runtime
152 /// operations cheap.
153 pub fn record_link(
154 &mut self,
155 source: Uuid,
156 target: Uuid,
157 edge: ProvenanceEdge,
158 ) -> Result<(), BubbleError> {
159 let source_idx = *self
160 .node_map
161 .get(&source)
162 .ok_or(BubbleError::NodeNotFound(source))?;
163 let target_idx = *self
164 .node_map
165 .get(&target)
166 .ok_or(BubbleError::NodeNotFound(target))?;
167
168 self.dag.add_edge(source_idx, target_idx, edge);
169
170 // Ensure the links table exists.
171 self.persistence
172 .create_table(
173 "provenance_links",
174 "id VARCHAR(255) PRIMARY KEY, source_id TEXT, target_id TEXT, edge_type TEXT",
175 )
176 .ok();
177
178 let link_id = Uuid::new_v4();
179 let data = serde_json::json!({
180 "id": link_id.to_string(),
181 "source_id": source.to_string(),
182 "target_id": target.to_string(),
183 "edge_type": format!("{:?}", edge),
184 });
185 self.persistence.insert_node("provenance_links", data)?;
186 self.persistence
187 .commit(&format!("Record provenance link: {} -> {}", source, target))?;
188 Ok(())
189 }
190}
191
192#[cfg(test)]
193mod tests {
194 use super::*;
195 use tempfile::TempDir;
196
197 #[test]
198 fn test_provenance_dag() {
199 let dir = TempDir::new().unwrap();
200 let persistence = DoltGraph::init(dir.path()).expect("dolt init failed");
201 let mut graph = ProvenanceGraph::new(persistence);
202
203 let id1 = Uuid::new_v4();
204 let id2 = Uuid::new_v4();
205
206 graph
207 .record_node(id1, "ucxl://agent:1@proj:task/#/file1.txt")
208 .unwrap();
209 graph
210 .record_node(id2, "ucxl://agent:1@proj:task/#/file2.txt")
211 .unwrap();
212
213 graph
214 .record_link(id1, id2, ProvenanceEdge::DerivedFrom)
215 .unwrap();
216 }
217}