Docs: Comprehensive inline rustdoc and architectural summary PDF

This commit is contained in:
anthonyrawlins
2026-03-03 18:05:53 +11:00
parent cc03616918
commit 0f28e4b669
2932 changed files with 14552 additions and 74 deletions

1
UCXL/.serena/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/cache

126
UCXL/.serena/project.yml Normal file
View File

@@ -0,0 +1,126 @@
# the name by which the project can be referenced within Serena
project_name: "UCXL"
# list of languages for which language servers are started; choose from:
# al bash clojure cpp csharp
# csharp_omnisharp dart elixir elm erlang
# fortran fsharp go groovy haskell
# java julia kotlin lua markdown
# matlab nix pascal perl php
# php_phpactor powershell python python_jedi r
# rego ruby ruby_solargraph rust scala
# swift terraform toml typescript typescript_vts
# vue yaml zig
# (This list may be outdated. For the current list, see values of Language enum here:
# https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py
# For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.)
# Note:
# - For C, use cpp
# - For JavaScript, use typescript
# - For Free Pascal/Lazarus, use pascal
# Special requirements:
# Some languages require additional setup/installations.
# See here for details: https://oraios.github.io/serena/01-about/020_programming-languages.html#language-servers
# When using multiple languages, the first language server that supports a given file will be used for that file.
# The first language is the default language and the respective language server will be used as a fallback.
# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored.
languages:
- rust
# the encoding used by text files in the project
# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings
encoding: "utf-8"
# The language backend to use for this project.
# If not set, the global setting from serena_config.yml is used.
# Valid values: LSP, JetBrains
# Note: the backend is fixed at startup. If a project with a different backend
# is activated post-init, an error will be returned.
language_backend:
# whether to use project's .gitignore files to ignore files
ignore_all_files_in_gitignore: true
# list of additional paths to ignore in this project.
# Same syntax as gitignore, so you can use * and **.
# Note: global ignored_paths from serena_config.yml are also applied additively.
ignored_paths: []
# whether the project is in read-only mode
# If set to true, all editing tools will be disabled and attempts to use them will result in an error
# Added on 2025-04-18
read_only: false
# list of tool names to exclude. We recommend not excluding any tools, see the readme for more details.
# Below is the complete list of tools for convenience.
# To make sure you have the latest list of tools, and to view their descriptions,
# execute `uv run scripts/print_tool_overview.py`.
#
# * `activate_project`: Activates a project by name.
# * `check_onboarding_performed`: Checks whether project onboarding was already performed.
# * `create_text_file`: Creates/overwrites a file in the project directory.
# * `delete_lines`: Deletes a range of lines within a file.
# * `delete_memory`: Deletes a memory from Serena's project-specific memory store.
# * `execute_shell_command`: Executes a shell command.
# * `find_referencing_code_snippets`: Finds code snippets in which the symbol at the given location is referenced.
# * `find_referencing_symbols`: Finds symbols that reference the symbol at the given location (optionally filtered by type).
# * `find_symbol`: Performs a global (or local) search for symbols with/containing a given name/substring (optionally filtered by type).
# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes.
# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file.
# * `initial_instructions`: Gets the initial instructions for the current project.
# Should only be used in settings where the system prompt cannot be set,
# e.g. in clients you have no control over, like Claude Desktop.
# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol.
# * `insert_at_line`: Inserts content at a given line in a file.
# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol.
# * `list_dir`: Lists files and directories in the given directory (optionally with recursion).
# * `list_memories`: Lists memories in Serena's project-specific memory store.
# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building).
# * `prepare_for_new_conversation`: Provides instructions for preparing for a new conversation (in order to continue with the necessary context).
# * `read_file`: Reads a file within the project directory.
# * `read_memory`: Reads the memory with the given name from Serena's project-specific memory store.
# * `remove_project`: Removes a project from the Serena configuration.
# * `replace_lines`: Replaces a range of lines within a file with new content.
# * `replace_symbol_body`: Replaces the full definition of a symbol.
# * `restart_language_server`: Restarts the language server, may be necessary when edits not through Serena happen.
# * `search_for_pattern`: Performs a search for a pattern in the project.
# * `summarize_changes`: Provides instructions for summarizing the changes made to the codebase.
# * `switch_modes`: Activates modes by providing a list of their names
# * `think_about_collected_information`: Thinking tool for pondering the completeness of collected information.
# * `think_about_task_adherence`: Thinking tool for determining whether the agent is still on track with the current task.
# * `think_about_whether_you_are_done`: Thinking tool for determining whether the task is truly completed.
# * `write_memory`: Writes a named memory (for future reference) to Serena's project-specific memory store.
excluded_tools: []
# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default)
included_optional_tools: []
# fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools.
# This cannot be combined with non-empty excluded_tools or included_optional_tools.
fixed_tools: []
# list of mode names to that are always to be included in the set of active modes
# The full set of modes to be activated is base_modes + default_modes.
# If the setting is undefined, the base_modes from the global configuration (serena_config.yml) apply.
# Otherwise, this setting overrides the global configuration.
# Set this to [] to disable base modes for this project.
# Set this to a list of mode names to always include the respective modes for this project.
base_modes:
# list of mode names that are to be activated by default.
# The full set of modes to be activated is base_modes + default_modes.
# If the setting is undefined, the default_modes from the global configuration (serena_config.yml) apply.
# Otherwise, this overrides the setting from the global configuration (serena_config.yml).
# This setting can, in turn, be overridden by CLI parameters (--mode).
default_modes:
# initial prompt for the project. It will always be given to the LLM upon activating the project
# (contrary to the memories, which are loaded on demand).
initial_prompt: ""
# time budget (seconds) per tool call for the retrieval of additional symbol information
# such as docstrings or parameter information.
# This overrides the corresponding setting in the global configuration; see the documentation there.
# If null or missing, use the setting from the global configuration.
symbol_info_budget:

View File

@@ -1,4 +1,9 @@
// UCXL Core Data Structures
//! UCXL core data structures and utilities.
//!
//! This module provides the fundamental types used throughout the CHORUS
//! system for addressing resources (UCXL addresses), handling temporal axes,
//! and storing lightweight metadata. The implementation is deliberately
//! lightweight and inmemory to keep the core fast and dependencyfree.
pub mod watcher;
@@ -7,18 +12,41 @@ use std::fmt;
use std::str::FromStr;
/// Represents the temporal axis in a UCXL address.
///
/// **What**: An enumeration of the three supported temporal positions
/// present, past, and future each represented by a symbolic string in the
/// address format.
///
/// **How**: The enum derives `Debug`, `PartialEq`, `Eq`, `Clone`, and `Copy`
/// for ergonomic usage. Conversions to and from strings are provided via the
/// `FromStr` and `fmt::Display` implementations.
///
/// **Why**: Temporal axes enable UCXL to refer to data at different points in
/// time (e.g. versioned resources). The simple threestate model matches the
/// CHURUS architectural decision to keep addressing lightweight while still
/// supporting historical and speculative queries.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum TemporalAxis {
/// Present ("#")
/// Present ("#") the current version of a resource.
Present,
/// Past ("~~")
/// Past ("~~") a historical snapshot of a resource.
Past,
/// Future ("^^")
/// Future ("^^") a speculative or planned version of a resource.
Future,
}
impl FromStr for TemporalAxis {
type Err = String;
/// Parses a temporal axis token from its textual representation.
///
/// **What**: Accepts "#", "~~" or "^^" and maps them to the corresponding
/// enum variant.
///
/// **How**: A simple `match` statement is used; an error string is
/// returned for any unrecognised token.
///
/// **Why**: Centralises validation of temporal markers used throughout the
/// address parsing logic, ensuring consistency.
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"#" => Ok(TemporalAxis::Present),
@@ -30,6 +58,15 @@ impl FromStr for TemporalAxis {
}
impl fmt::Display for TemporalAxis {
/// Formats the temporal axis back to its string token.
///
/// **What**: Returns "#", "~~" or "^^" depending on the variant.
///
/// **How**: Matches on `self` and writes the corresponding string to the
/// formatter.
///
/// **Why**: Required for serialising a `UCXLAddress` back to its textual
/// representation.
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
TemporalAxis::Present => "#",
@@ -41,18 +78,48 @@ impl fmt::Display for TemporalAxis {
}
/// Represents a parsed UCXL address.
///
/// **What**: Holds the components extracted from a UCXL URI the agent, an
/// optional role, the project identifier, task name, temporal axis, and the
/// resource path within the project.
///
/// **How**: The struct is constructed via the `FromStr` implementation which
/// validates the scheme, splits the address into its constituent parts and
/// populates the fields. The `Display` implementation performs the inverse
/// operation.
///
/// **Why**: UCXL addresses are the primary routing mechanism inside CHORUS.
/// Encapsulating them in a dedicated type provides typesafety and makes it
/// easy to work with address components in the rest of the codebase.
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct UCXLAddress {
/// The identifier of the agent (e.g., a user or system component).
pub agent: String,
/// Optional role associated with the agent (e.g., "admin").
pub role: Option<String>,
/// The project namespace this address belongs to.
pub project: String,
/// The specific task within the project.
pub task: String,
/// Temporal axis indicating present, past or future.
pub temporal: TemporalAxis,
/// Path to the resource relative to the project root.
pub path: String,
}
impl FromStr for UCXLAddress {
type Err = String;
/// Parses a full UCXL address string into a `UCXLAddress` value.
///
/// **What**: Validates the scheme (`ucxl://`), extracts the agent, optional
/// role, project, task, temporal axis and the trailing resource path.
///
/// **How**: The implementation performs a series of `split` operations,
/// handling optional components and converting the temporal token via
/// `TemporalAxis::from_str`. Errors are surfaced as descriptive strings.
///
/// **Why**: Centralises address parsing logic, ensuring that all parts of
/// the system interpret UCXL URIs consistently.
fn from_str(address: &str) -> Result<Self, Self::Err> {
// Ensure the scheme is correct
let scheme_split: Vec<&str> = address.splitn(2, "://").collect();
@@ -102,6 +169,16 @@ impl FromStr for UCXLAddress {
}
impl fmt::Display for UCXLAddress {
/// Serialises the address back to its canonical string form.
///
/// **What**: Constructs a `ucxl://` URI including optional role and path.
///
/// **How**: Conditionally inserts the role component, then formats the
/// project, task, temporal token and optional path using standard `write!`
/// semantics.
///
/// **Why**: Needed when emitting addresses (e.g., logging events or
/// generating links) so that external tools can consume them.
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let role_part = if let Some(r) = &self.role {
format!(":{}", r)
@@ -125,21 +202,51 @@ impl fmt::Display for UCXLAddress {
}
}
/// Simple inmemory metadata store mapping a file path to a metadata string.
/// Trait defining a simple keyvalue metadata store.
///
/// **What**: Provides read, write and removal operations for associating a
/// string of metadata with a filesystem path.
///
/// **How**: The trait abstracts over concrete storage implementations
/// currently an inmemory `HashMap` allowing callers to depend on the trait
/// rather than a specific type.
///
/// **Why**: CHORUS needs a lightweight way to attach auxiliary information to
/// files without persisting to a database; the trait makes it easy to swap in a
/// persistent backend later if required.
pub trait MetadataStore {
/// Retrieves the metadata for `path` if it exists.
fn get(&self, path: &str) -> Option<&String>;
/// Stores `metadata` for `path`, overwriting any existing value.
fn set(&mut self, path: &str, metadata: String);
/// Removes the metadata entry for `path`, returning the old value if any.
fn remove(&mut self, path: &str) -> Option<String> {
None
}
}
/// A concrete inmemory implementation using a HashMap.
/// Inmemory implementation of `MetadataStore` backed by a `HashMap`.
///
/// **What**: Holds metadata in a hash map where the key is the file path.
///
/// **How**: Provides a `new` constructor and implements the `MetadataStore`
/// trait methods by delegating to the underlying map.
///
/// **Why**: Offers a zerocost, dependencyfree store suitable for unit tests
/// and simple scenarios. It can be replaced with a persistent store without
/// changing callers.
pub struct InMemoryMetadataStore {
map: HashMap<String, String>,
}
impl InMemoryMetadataStore {
/// Creates a fresh, empty `InMemoryMetadataStore`.
///
/// **What**: Returns a struct with an empty internal map.
///
/// **How**: Calls `HashMap::new`.
///
/// **Why**: Convenience constructor for callers.
pub fn new() -> Self {
InMemoryMetadataStore {
map: HashMap::new(),

View File

@@ -1,20 +1,63 @@
//! UCXL filesystem watcher.
//!
//! This module provides a thin wrapper around the `notify` crate to watch a
//! directory (or "project") for filesystem events. When a change is detected,
//! the watcher attempts to construct a corresponding `UCXLAddress` using a
//! simple heuristic and logs the event. This is primarily used by CHORUS for
//! reactive workflows such as automatically updating metadata when files are
//! added, modified or removed.
use notify::{Config, RecommendedWatcher, RecursiveMode, Watcher};
use std::path::Path;
use std::sync::mpsc::channel;
use crate::{UCXLAddress, TemporalAxis};
use crate::UCXLAddress;
use std::str::FromStr;
/// Represents a watcher rooted at a specific base path.
///
/// **What**: Holds the absolute path that the watcher monitors.
///
/// **How**: The path is stored as a `PathBuf`. The watcher is created via the
/// `new` constructor which accepts any type that can be referenced as a `Path`.
/// The underlying `notify::RecommendedWatcher` is configured with the default
/// `Config` and set to watch recursively.
///
/// **Why**: Encapsulating the watcher logic in a dedicated struct makes it easy
/// to instantiate multiple independent watchers and keeps the public API tidy.
pub struct UCXLWatcher {
base_path: std::path::PathBuf,
}
impl UCXLWatcher {
/// Creates a new `UCXLWatcher` for the given path.
///
/// **What**: Accepts any generic `AsRef<Path>` so callers can pass a `&str`,
/// `Path`, or `PathBuf`.
///
/// **How**: The provided path is converted to a `PathBuf` and stored.
///
/// **Why**: Convenience constructor used throughout CHORUS when a watcher is
/// needed for a project directory.
pub fn new<P: AsRef<Path>>(path: P) -> Self {
Self {
base_path: path.as_ref().to_path_buf(),
}
}
/// Starts the watch loop, blocking indefinitely while handling events.
///
/// **What**: Sets up a channel, creates a `RecommendedWatcher`, and begins
/// watching the `base_path` recursively. For each incoming event, it
/// attempts to map the filesystem path to a UCXL address and prints a log.
///
/// **How**: Uses the `notify` crate's event API. The heuristic address
/// format is `ucxl://system:watcher@local:filesystem/#/<relative_path>`.
/// It parses this string with `UCXLAddress::from_str` and logs the result.
/// Errors from parsing are ignored (they simply aren't printed).
///
/// **Why**: Provides a simple, observable bridge between raw filesystem
/// changes and the UCXL addressing scheme, allowing other components to react
/// to changes using a uniform identifier.
pub fn watch_loop(&self) -> Result<(), Box<dyn std::error::Error>> {
let (tx, rx) = channel();
@@ -29,8 +72,11 @@ impl UCXLWatcher {
for path in event.paths {
if let Some(rel_path) = path.strip_prefix(&self.base_path).ok() {
let rel_str = rel_path.to_string_lossy();
// Attempt a heuristic address mapping: ucxl://system:watcher@local:filesystem/#/path
let addr_str = format!("ucxl://system:watcher@local:filesystem/#/{}", rel_str);
// Heuristic address mapping: ucxl://system:watcher@local:filesystem/#/path
let addr_str = format!(
"ucxl://system:watcher@local:filesystem/#/{}",
rel_str
);
if let Ok(addr) = UCXLAddress::from_str(&addr_str) {
println!("[UCXL EVENT] {:?} -> {}", event.kind, addr);
}