chrs_bubble/
lib.rs

1/// # chrs-bubble
2///
3/// A provenance‑tracking crate that records nodes and edges in a directed acyclic
4/// graph (DAG) and persists them using a Dolt‑backed graph implementation.
5/// The crate is deliberately small – it only pulls in `petgraph` for the in‑memory
6/// DAG, `serde` for serialization, `uuid` for unique identifiers and `thiserror`
7/// for ergonomic error handling.  It is used by higher‑level components that need
8/// to capture the provenance of generated artifacts (e.g. files, messages, or
9/// results) and later query that history.
10///
11/// The public API is organised around three concepts:
12/// * **ProvenanceEdge** – The type of relationship between two nodes.
13/// * **BubbleError** – Errors that can occur when interacting with the underlying
14///   Dolt graph or when a node cannot be found.
15/// * **ProvenanceGraph** – The façade that holds an in‑memory DAG and a
16///   `DoltGraph` persistence layer, exposing methods to record nodes and links.
17///
18/// Each item is documented with a *WHAT*, *HOW* and *WHY* section so that users can
19/// quickly understand its purpose, its implementation details and the design
20/// rationale.
21use chrs_graph::{DoltGraph, GraphError};
22use petgraph::graph::{DiGraph, NodeIndex};
23use serde::{Deserialize, Serialize};
24use std::collections::HashMap;
25use thiserror::Error;
26use ucxl::UCXLAddress;
27use uuid::Uuid;
28
29/// Represents the kind of relationship between two provenance nodes.
30///
31/// * **WHAT** – An enumeration of supported edge types. Currently we support:
32///   - `DerivedFrom` – Indicates that the target was derived from the source.
33///   - `Cites` – A citation relationship.
34///   - `InfluencedBy` – Denotes influence without direct derivation.
35/// * **HOW** – Used as the edge payload in the `petgraph::DiGraph`. The enum is
36///   `#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]` so it
37///   can be serialised when persisting the graph.
38/// * **WHY** – Encoding edge semantics as a dedicated enum makes provenance
39///   queries expressive and type‑safe, while keeping the on‑disk representation
40///   simple (a stringified variant).
41#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
42pub enum ProvenanceEdge {
43    /// The target node was *derived* from the source node.
44    DerivedFrom,
45    /// The target node *cites* the source node.
46    Cites,
47    /// The target node was *influenced* by the source node.
48    InfluencedBy,
49}
50
51/// Errors that can arise when working with a `ProvenanceGraph`.
52///
53/// * **WHAT** – Enumerates possible failure modes:
54///   - Graph‑level errors (`GraphError`).
55///   - Serde JSON errors (`serde_json::Error`).
56///   - A lookup failure when a node identifier cannot be resolved.
57/// * **HOW** – Implements `std::error::Error` via the `thiserror::Error` derive
58///   macro, forwarding underlying error sources with `#[from]`.
59/// * **WHY** – A single error type simplifies error propagation for callers and
60///   retains the original context for debugging.
61#[derive(Debug, Error)]
62pub enum BubbleError {
63    #[error("Graph error: {0}")]
64    Graph(#[from] GraphError),
65    #[error("Serde error: {0}")]
66    Serde(#[from] serde_json::Error),
67    #[error("Node not found: {0}")]
68    NodeNotFound(Uuid),
69}
70
71/// Core structure that maintains an in‑memory DAG of provenance nodes and a
72/// persistent `DoltGraph` backend.
73///
74/// * **WHAT** – Holds:
75///   - `persistence`: The Dolt‑based storage implementation.
76///   - `dag`: A `petgraph::DiGraph` where node payloads are UUIDs and edges are
77///     `ProvenanceEdge`s.
78///   - `node_map`: A fast lookup map from node UUID to the corresponding
79///     `petgraph::NodeIndex`.
80/// * **HOW** – Provides methods to create nodes (`record_node`) and edges
81///   (`record_link`). These methods insert into the in‑memory graph and then
82///   persist the data in Dolt tables using simple `INSERT` statements followed by
83///   a `commit`.
84/// * **WHY** – Separating the transient in‑memory representation from durable
85///   storage gives fast runtime queries while guaranteeing that the provenance
86///   graph can survive process restarts and be inspected via Dolt tools.
87pub struct ProvenanceGraph {
88    persistence: DoltGraph,
89    dag: DiGraph<Uuid, ProvenanceEdge>,
90    node_map: HashMap<Uuid, NodeIndex>,
91}
92
93impl ProvenanceGraph {
94    /// Creates a new `ProvenanceGraph` backed by a pre‑initialised `DoltGraph`.
95    ///
96    /// * **WHAT** – Returns a fresh instance with empty in‑memory structures.
97    /// * **HOW** – Stores the supplied `persistence` and constructs a new `DiGraph`
98    ///   and empty `HashMap`.
99    /// * **WHY** – Allows callers to decide where the Dolt repository lives (e.g.
100    ///   a temporary directory for tests or a permanent location for production).
101    pub fn new(persistence: DoltGraph) -> Self {
102        Self {
103            persistence,
104            dag: DiGraph::new(),
105            node_map: HashMap::new(),
106        }
107    }
108
109    /// Records a provenance node with a unique `Uuid` and an associated address.
110    ///
111    /// * **WHAT** – Persists the node both in‑memory (`dag` + `node_map`) and in a
112    ///   Dolt table called `provenance_nodes`.
113    /// * **HOW** – If the node does not already exist, it is added to the DAG and a
114    ///   row is inserted via `persistence.insert_node`. A commit is performed with a
115    ///   descriptive message.
116    /// * **WHY** – Storing the address (typically a UCXL address) allows later
117    ///   resolution of where the artifact originated.
118    pub fn record_node(&mut self, id: Uuid, address: &str) -> Result<(), BubbleError> {
119        if !self.node_map.contains_key(&id) {
120            let idx = self.dag.add_node(id);
121            self.node_map.insert(id, idx);
122
123            // Ensure the backing table exists – ignore errors if it already does.
124            self.persistence
125                .create_table(
126                    "provenance_nodes",
127                    "id VARCHAR(255) PRIMARY KEY, address TEXT",
128                )
129                .ok();
130
131            let data = serde_json::json!({
132                "id": id.to_string(),
133                "address": address,
134            });
135            self.persistence.insert_node("provenance_nodes", data)?;
136            self.persistence
137                .commit(&format!("Record provenance node: {}", id))?;
138        }
139        Ok(())
140    }
141
142    /// Records a directed edge between two existing nodes.
143    ///
144    /// * **WHAT** – Adds an edge of type `ProvenanceEdge` to the DAG and stores a
145    ///   corresponding row in the `provenance_links` Dolt table.
146    /// * **HOW** – Retrieves the `NodeIndex` for each UUID (erroring with
147    ///   `BubbleError::NodeNotFound` if missing), adds the edge to `dag`, then
148    ///   inserts a row containing a new link UUID, source/target IDs and the edge
149    ///   type as a string.
150    /// * **WHY** – Persisting links allows the full provenance graph to be queried
151    ///   outside the process, while the in‑memory representation keeps runtime
152    ///   operations cheap.
153    pub fn record_link(
154        &mut self,
155        source: Uuid,
156        target: Uuid,
157        edge: ProvenanceEdge,
158    ) -> Result<(), BubbleError> {
159        let source_idx = *self
160            .node_map
161            .get(&source)
162            .ok_or(BubbleError::NodeNotFound(source))?;
163        let target_idx = *self
164            .node_map
165            .get(&target)
166            .ok_or(BubbleError::NodeNotFound(target))?;
167
168        self.dag.add_edge(source_idx, target_idx, edge);
169
170        // Ensure the links table exists.
171        self.persistence
172            .create_table(
173                "provenance_links",
174                "id VARCHAR(255) PRIMARY KEY, source_id TEXT, target_id TEXT, edge_type TEXT",
175            )
176            .ok();
177
178        let link_id = Uuid::new_v4();
179        let data = serde_json::json!({
180            "id": link_id.to_string(),
181            "source_id": source.to_string(),
182            "target_id": target.to_string(),
183            "edge_type": format!("{:?}", edge),
184        });
185        self.persistence.insert_node("provenance_links", data)?;
186        self.persistence
187            .commit(&format!("Record provenance link: {} -> {}", source, target))?;
188        Ok(())
189    }
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195    use tempfile::TempDir;
196
197    #[test]
198    fn test_provenance_dag() {
199        let dir = TempDir::new().unwrap();
200        let persistence = DoltGraph::init(dir.path()).expect("dolt init failed");
201        let mut graph = ProvenanceGraph::new(persistence);
202
203        let id1 = Uuid::new_v4();
204        let id2 = Uuid::new_v4();
205
206        graph
207            .record_node(id1, "ucxl://agent:1@proj:task/#/file1.txt")
208            .unwrap();
209        graph
210            .record_node(id2, "ucxl://agent:1@proj:task/#/file2.txt")
211            .unwrap();
212
213        graph
214            .record_link(id1, id2, ProvenanceEdge::DerivedFrom)
215            .unwrap();
216    }
217}