From 8f19eaab25845f1c40b87fb3a220aba85a5a43f6 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Tue, 29 Jul 2025 12:13:16 +1000 Subject: [PATCH] Initial HCFS project scaffold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🚀 Generated with Claude Code - Project plan and architecture documentation - Python package structure with core modules - API design and basic usage examples - Development environment configuration - Literature review and research foundation Ready for Phase 1 implementation. Co-Authored-By: Claude --- .gitignore | 74 ++++++++ LICENSE | 21 +++ PROJECT_PLAN.md | 129 +++++++++++++ README.md | 55 ++++++ docs/API_REFERENCE.md | 314 ++++++++++++++++++++++++++++++++ docs/ARCHITECTURE.md | 146 +++++++++++++++ examples/basic_usage.py | 195 ++++++++++++++++++++ pyproject.toml | 129 +++++++++++++ src/hcfs/__init__.py | 20 ++ src/hcfs/api/__init__.py | 13 ++ src/hcfs/filesystem/__init__.py | 11 ++ src/hcfs/indexing/__init__.py | 13 ++ src/hcfs/storage/__init__.py | 14 ++ src/hcfs/utils/__init__.py | 16 ++ 14 files changed, 1150 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 PROJECT_PLAN.md create mode 100644 README.md create mode 100644 docs/API_REFERENCE.md create mode 100644 docs/ARCHITECTURE.md create mode 100644 examples/basic_usage.py create mode 100644 pyproject.toml create mode 100644 src/hcfs/__init__.py create mode 100644 src/hcfs/api/__init__.py create mode 100644 src/hcfs/filesystem/__init__.py create mode 100644 src/hcfs/indexing/__init__.py create mode 100644 src/hcfs/storage/__init__.py create mode 100644 src/hcfs/utils/__init__.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7d87425 --- /dev/null +++ b/.gitignore @@ -0,0 +1,74 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Virtual environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Database files +*.db +*.sqlite +*.sqlite3 + +# Logs +*.log +logs/ + +# Test coverage +.coverage +.pytest_cache/ +htmlcov/ + +# FUSE mount points +/mount/ +/mnt/ + +# Context data +/data/ +/context_store/ + +# Temporary files +/tmp/ +*.tmp +*.temp \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ebf7a6a --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 HCFS Project + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/PROJECT_PLAN.md b/PROJECT_PLAN.md new file mode 100644 index 0000000..76d5f9d --- /dev/null +++ b/PROJECT_PLAN.md @@ -0,0 +1,129 @@ +--- + +# PROJECT\_PLAN.md + +## 📘 Title + +**Context‑Aware Hierarchical Context File System (HCFS)**: Unifying file system paths with context blobs for agentic AI cognition + +--- + +## 1. Research Motivation & Literature Review 🧠 + +* **Semantic and context‑aware file systems**: Gifford et al. (1991) proposed early semantic file systems using directory paths as semantic queries ([Wikipedia][1]). Later work explored tag‑based and ontology‑based systems for richer metadata and context-aware retrieval ([Wikipedia][1]). +* **LLM‑driven semantic FS (LSFS)**: The recent ICLR 2025 LSFS proposes integrating vector DBs and semantic indexing into a filesystem that supports prompt-driven file operations and semantic rollback ([OpenReview][2]). +* **Path-structure embeddings**: Recent Transformer-based work shows file paths can be modeled as sequences for semantic anomaly detection—capturing hierarchy and semantics in embeddings ([MDPI][3]). +* **Context modeling frameworks**: Ontology-driven context models (e.g. OWL/SOCAM) support representing, reasoning about, and sharing context hierarchically ([arXiv][4]). + +Your HCFS merges these prior insights into a hybrid: directory navigation = query scope, backed by semantic context blobs in a DB, enabling agentic systems to zoom in/out contextually. + +--- + +## 2. Objectives & Scope + +1. Design a **virtual filesystem layer** that maps hierarchical paths to context blobs. +2. Build a **context storage system** (DB) to hold context units, versioned and indexed. +3. Define **APIs and syscalls** for agents to: + + * navigate context scope (`cd`‑style), + * request context retrieval, + * push new context, + * merge or inherit context across levels. +4. Enable **decentralized context sharing**: agents can publish updates at path-nodes; peer agents subscribe by tree‑paths. +5. Prototype on a controlled dataset / toy project tree to validate: + + * latency, + * correct retrieval, + * hierarchical inheritance semantics. + +--- + +## 3. System Architecture Overview + +### 3.1 Virtual Filesystem Layer (e.g. FUSE or AIOS integration) + +* Presents standard POSIX (or AIOS‑style) tree structure. +* Each directory or file node has metadata pointers into context‑blob IDs. +* Traversal (e.g., `ls`, `cd`) triggers context lookup for that path. + +### 3.2 Context Database Backend + +* Two possible designs: + + * **Relational/SQLite + versioned tables**: simple, transactional, supports hierarchical inheritance via path parent pointers. + * **Graph DB (e.g., Neo4j)**: ideal for multi-parent contexts, symlink-like context inheritance. +* Context blobs include: + + * blob ID, + * path(s) bound, + * timestamp/version, author/agent, + * embedding or semantic tags, + * content or summary. + +### 3.3 Indexing & Embeddings + +* Generate embeddings of context blobs for semantic similarity retrieval (e.g. for context folding) ([OpenReview][5], [OpenReview][2], [MDPI][3]). +* Use combination of BM25 + embedding ranking (contextual retrieval) for accurate scope-based retrieval ([TECHCOMMUNITY.MICROSOFT.COM][6]). + +### 3.4 API & Syscalls + +* `context_cd(path)`: sets current context pointer. +* `context_get(depth=N)`: retrieves cumulative context from current node up N levels. +* `context_push(path, blob)`: insert new context tied to a path. +* `context_list(path)`: lists available context blobs at that path. +* `context_subscribe(path)`: agent registers to receive updates at a path. + +--- + +## 4. Project Timeline & Milestones + +| Phase | Duration | Deliverables | +| ---------------------------------------------- | -------- | -------------------------------------------------------- | +| **Phase 0: Research & Design** | 2 weeks | Literature review doc, architecture draft | +| **Phase 1: Prototype FS layer** | 4 weeks | Minimal FUSE‑based path→context mapping, CLI demo | +| **Phase 2: Backend DB & storage** | 4 weeks | Context blob storage, path linkage, versioning | +| **Phase 3: Embedding & retrieval integration** | 3 weeks | Embeddings + BM25 hybrid ranking for context relevance | +| **Phase 4: API/Syscall layer scripting** | 3 weeks | Python (or AIOS) service exposing navigation + push APIs | +| **Phase 5: Agent integration & simulation** | 3 weeks | Dummy AI agents navigating, querying, publishing context | +| **Phase 6: Evaluation & refinement** | 2 weeks | Usability, latency, retrieval relevance metrics | +| **Phase 7: Write-up & publication** | 2 weeks | Report, possible poster/paper submission | + +--- + +## 5. Risks & Alternatives + +* **Semantic vs hierarchical mismatch**: Flat tag systems (e.g. Tagsistant) offer semantic tagging but lack path-based inheritance ([research.ijcaonline.org][7], [OpenReview][2], [Wikipedia][1], [arXiv][8], [Anthropic][9], [OpenReview][5], [Wikipedia][10]). +* **Context explosion**: many small blobs flooding the DB—mitigate via summarization/folding. +* **Performance trade‑offs**: FS lookups must stay acceptable; versioned graph storage might slow down. Consider caching snapshots at each node. + +--- + +## 6. Peer‑Reviewed References + +* David Gifford et al., *Semantic file systems*, ACM Operating Systems Review (1991) ([Wikipedia][1]) +* ICLR 2025: *From Commands to Prompts: LLM-based Semantic File System for AIOS* (LSFS) ([OpenReview][2]) +* Xiaoyu et al., *Transformer-based path sequence modeling for file‑path anomaly detection* ([MDPI][3]) +* Tao Gu et al., *Ontology‑based Context Model in Intelligent Environments* (SOCAM) ([arXiv][4]) + +--- + +## 7. Next Steps + +* Review cited literature, build an annotated bibliography. +* Choose backend stack (SQLite vs graph DB) and test embedding pipeline. +* Begin Phase 1: implementing minimal context‑aware FS mock. + +--- + +Let me know if you’d like me to flesh out a proof‑of‑concept scaffold (for example, in Python + SQLite + FUSE), or write a full proposal for funding or conference submission! + +[1]: https://en.wikipedia.org/wiki/Semantic_file_system?utm_source=chatgpt.com "Semantic file system" +[2]: https://openreview.net/forum?id=2G021ZqUEZ&utm_source=chatgpt.com "From Commands to Prompts: LLM-based Semantic File System for AIOS" +[3]: https://www.mdpi.com/2079-8954/13/6/403?utm_source=chatgpt.com "Effective Context-Aware File Path Embeddings for Anomaly Detection - MDPI" +[4]: https://arxiv.org/abs/2003.05055?utm_source=chatgpt.com "An Ontology-based Context Model in Intelligent Environments" +[5]: https://openreview.net/pdf?id=2G021ZqUEZ&utm_source=chatgpt.com "F COMMANDS TO PROMPTS LLM- S FILE SYSTEM FOR AIOS - OpenReview" +[6]: https://techcommunity.microsoft.com/blog/azure-ai-services-blog/building-a-contextual-retrieval-system-for-improving-rag-accuracy/4271924?utm_source=chatgpt.com "Building a Contextual Retrieval System for Improving RAG Accuracy" +[7]: https://research.ijcaonline.org/volume121/number1/pxc3904433.pdf?utm_source=chatgpt.com "A Survey on Different File System Approach - research.ijcaonline.org" +[8]: https://arxiv.org/abs/1909.10123?utm_source=chatgpt.com "SplitFS: Reducing Software Overhead in File Systems for Persistent Memory" +[9]: https://www.anthropic.com/news/contextual-retrieval?utm_source=chatgpt.com "Introducing Contextual Retrieval \ Anthropic" +[10]: https://en.wikipedia.org/wiki/Tagsistant?utm_source=chatgpt.com "Tagsistant" diff --git a/README.md b/README.md new file mode 100644 index 0000000..919f870 --- /dev/null +++ b/README.md @@ -0,0 +1,55 @@ +# HCFS - Hierarchical Context File System + +**Context-Aware Hierarchical Context File System (HCFS)**: Unifying file system paths with context blobs for agentic AI cognition + +## Overview + +HCFS is a virtual filesystem layer that maps hierarchical paths to context blobs, enabling agentic AI systems to navigate and share context in a structured, hierarchical manner. It combines the intuitive nature of file system navigation with semantic context storage and retrieval. + +## Key Features + +- **Virtual Filesystem Layer**: Standard POSIX-style directory navigation backed by context blobs +- **Context Database Backend**: Versioned context storage with hierarchical inheritance +- **Semantic Indexing**: Embeddings and BM25 hybrid ranking for context relevance +- **Agent APIs**: Syscall-style APIs for context navigation, retrieval, and publishing +- **Decentralized Context Sharing**: Agents can publish/subscribe to context updates by path + +## Quick Start + +This project is currently in the planning and research phase. See [PROJECT_PLAN.md](PROJECT_PLAN.md) for detailed architecture and implementation timeline. + +## Architecture + +``` +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Agent APIs │ │ Virtual FS │ │ Context DB │ +│ │ │ Layer (FUSE) │ │ Backend │ +│ • context_cd() │◄──►│ │◄──►│ │ +│ • context_get() │ │ /project/ │ │ • Blob storage │ +│ • context_push()│ │ /project/src/ │ │ • Versioning │ +│ • context_list()│ │ /project/docs/ │ │ • Embeddings │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ +``` + +## Development Phases + +- **Phase 0**: Research & Design (2 weeks) +- **Phase 1**: Prototype FS layer (4 weeks) +- **Phase 2**: Backend DB & storage (4 weeks) +- **Phase 3**: Embedding & retrieval integration (3 weeks) +- **Phase 4**: API/Syscall layer scripting (3 weeks) +- **Phase 5**: Agent integration & simulation (3 weeks) +- **Phase 6**: Evaluation & refinement (2 weeks) +- **Phase 7**: Write-up & publication (2 weeks) + +## Contributing + +This project is in early development. See [PROJECT_PLAN.md](PROJECT_PLAN.md) for detailed specifications and implementation roadmap. + +## License + +MIT License - see [LICENSE](LICENSE) for details. + +## Research Context + +HCFS builds upon research in semantic file systems, LLM-driven semantic filesystems (LSFS), path-structure embeddings, and context modeling frameworks. See the literature review section in [PROJECT_PLAN.md](PROJECT_PLAN.md) for full references. \ No newline at end of file diff --git a/docs/API_REFERENCE.md b/docs/API_REFERENCE.md new file mode 100644 index 0000000..7205ea9 --- /dev/null +++ b/docs/API_REFERENCE.md @@ -0,0 +1,314 @@ +# HCFS API Reference + +## Overview + +The HCFS API provides syscall-style functions for agents to navigate, query, and manipulate hierarchical context. All operations are designed to be familiar to agents accustomed to filesystem operations. + +## Core Navigation API + +### `context_cd(path: str) -> bool` + +Change the current context directory. Similar to the shell `cd` command. + +**Parameters:** +- `path`: Target path (absolute or relative) + +**Returns:** +- `True` if path exists and is accessible +- `False` if path does not exist or is inaccessible + +**Example:** +```python +# Navigate to project root +success = context_cd("/project") + +# Navigate to subdirectory +success = context_cd("src/models") + +# Navigate up one level +success = context_cd("..") +``` + +### `context_pwd() -> str` + +Get the current context working directory. + +**Returns:** +- Current absolute path as string + +**Example:** +```python +current_path = context_pwd() +# Returns: "/project/src/models" +``` + +### `context_ls(path: str = None) -> List[str]` + +List available context paths at the specified directory. + +**Parameters:** +- `path`: Directory path (default: current directory) + +**Returns:** +- List of child path names + +**Example:** +```python +# List current directory +paths = context_ls() +# Returns: ["models/", "utils/", "tests/", "README.md"] + +# List specific directory +paths = context_ls("/project/docs") +# Returns: ["api/", "architecture/", "examples/"] +``` + +## Context Retrieval API + +### `context_get(depth: int = 1, filters: dict = None) -> List[ContextBlob]` + +Retrieve context blobs from current path and optionally parent paths. + +**Parameters:** +- `depth`: How many levels up the hierarchy to include (1 = current only) +- `filters`: Optional filters (content_type, author, date_range, etc.) + +**Returns:** +- List of `ContextBlob` objects ordered by relevance + +**Example:** +```python +# Get context from current path only +context = context_get(depth=1) + +# Get context from current path and 2 parent levels +context = context_get(depth=3) + +# Get context with filters +context = context_get( + depth=2, + filters={ + "content_type": "documentation", + "author": "claude", + "since": "2025-01-01" + } +) +``` + +### `context_search(query: str, scope: str = None) -> List[ContextBlob]` + +Perform semantic search across context blobs. + +**Parameters:** +- `query`: Search query string +- `scope`: Path scope to limit search (default: current path and children) + +**Returns:** +- List of `ContextBlob` objects ranked by relevance + +**Example:** +```python +# Search within current scope +results = context_search("error handling patterns") + +# Search within specific scope +results = context_search( + "database connection", + scope="/project/src/models" +) +``` + +## Context Manipulation API + +### `context_push(path: str, blob: ContextBlob) -> str` + +Add or update context at the specified path. + +**Parameters:** +- `path`: Target path for the context +- `blob`: ContextBlob object containing content and metadata + +**Returns:** +- Blob ID of the created/updated context + +**Example:** +```python +from hcfs.api import ContextBlob + +# Create new context blob +blob = ContextBlob( + content="This module handles user authentication", + content_type="documentation", + tags=["auth", "security", "users"], + metadata={"priority": "high"} +) + +# Push to specific path +blob_id = context_push("/project/src/auth.py", blob) +``` + +### `context_delete(path: str, blob_id: str = None) -> bool` + +Delete context blob(s) at the specified path. + +**Parameters:** +- `path`: Target path +- `blob_id`: Specific blob ID (if None, deletes all blobs at path) + +**Returns:** +- `True` if deletion successful +- `False` if path/blob not found or permission denied + +**Example:** +```python +# Delete specific blob +success = context_delete("/project/src/auth.py", blob_id) + +# Delete all context at path +success = context_delete("/project/old_module/") +``` + +### `context_update(blob_id: str, updates: dict) -> bool` + +Update an existing context blob. + +**Parameters:** +- `blob_id`: ID of blob to update +- `updates`: Dictionary of fields to update + +**Returns:** +- `True` if update successful +- `False` if blob not found or permission denied + +**Example:** +```python +# Update blob content and tags +success = context_update(blob_id, { + "content": "Updated documentation with new examples", + "tags": ["auth", "security", "users", "examples"] +}) +``` + +## Subscription API + +### `context_subscribe(path: str, callback: Callable, filters: dict = None) -> str` + +Subscribe to context changes at the specified path. + +**Parameters:** +- `path`: Path to monitor +- `callback`: Function to call when changes occur +- `filters`: Optional filters for subscription + +**Returns:** +- Subscription ID string + +**Example:** +```python +def on_context_change(event): + print(f"Context changed at {event.path}: {event.change_type}") + +# Subscribe to changes in current directory +sub_id = context_subscribe( + "/project/src/", + callback=on_context_change, + filters={"change_type": ["create", "update"]} +) +``` + +### `context_unsubscribe(subscription_id: str) -> bool` + +Cancel a context subscription. + +**Parameters:** +- `subscription_id`: ID returned from `context_subscribe` + +**Returns:** +- `True` if unsubscribe successful +- `False` if subscription not found + +**Example:** +```python +success = context_unsubscribe(sub_id) +``` + +## Data Models + +### ContextBlob + +```python +class ContextBlob: + id: str # Unique blob identifier + content: str # Main content text + content_type: str # Type: "code", "documentation", "config", etc. + tags: List[str] # Searchable tags + metadata: Dict[str, Any] # Additional metadata + author: str # Creator identifier + created_at: datetime # Creation timestamp + updated_at: datetime # Last update timestamp + version: int # Version number + parent_version: Optional[str] # Parent blob ID if forked +``` + +### ContextPath + +```python +class ContextPath: + path: str # Full path string + components: List[str] # Path components + depth: int # Depth from root + is_absolute: bool # True if absolute path + exists: bool # True if path has context +``` + +### ContextQuery + +```python +class ContextQuery: + query: str # Search query + filters: Dict[str, Any] # Search filters + scope: str # Search scope path + limit: int # Max results + offset: int # Results offset + sort_by: str # Sort field + sort_order: str # "asc" or "desc" +``` + +## Error Handling + +All API functions raise specific exceptions for different error conditions: + +- `PathNotFoundError`: Path does not exist +- `PermissionDeniedError`: Insufficient permissions +- `InvalidPathError`: Malformed path syntax +- `ContextNotFoundError`: Context blob not found +- `ValidationError`: Invalid data provided +- `StorageError`: Backend storage error + +**Example:** +```python +from hcfs.api import PathNotFoundError, PermissionDeniedError + +try: + context = context_get(depth=2) +except PathNotFoundError: + print("Current path has no context") +except PermissionDeniedError: + print("Access denied to context") +``` + +## Configuration + +API behavior can be configured via `HCFSConfig`: + +```python +from hcfs.utils import HCFSConfig + +config = HCFSConfig( + max_depth=10, # Maximum traversal depth + cache_size=1000, # LRU cache size + default_content_type="text", # Default blob content type + enable_versioning=True, # Enable blob versioning + subscription_timeout=300 # Subscription timeout (seconds) +) +``` \ No newline at end of file diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..a60cf48 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,146 @@ +# HCFS Architecture + +## Overview + +The Hierarchical Context File System (HCFS) is designed as a layered architecture that bridges filesystem navigation with semantic context storage and retrieval. + +## System Components + +### 1. Virtual Filesystem Layer (`src/hcfs/filesystem/`) + +The virtual filesystem presents a standard POSIX-like directory structure backed by context blobs rather than traditional files. + +**Key Components:** +- **HCFSFilesystem**: Main filesystem interface +- **HCFSFuseOperations**: FUSE-based filesystem operations (readdir, getattr, etc.) + +**Responsibilities:** +- Present hierarchical path structure to agents +- Map filesystem operations to context queries +- Handle path-based navigation (`cd`, `ls`, etc.) +- Maintain current context scope per session + +### 2. Storage Backend (`src/hcfs/storage/`) + +The storage layer manages persistent context blob storage with versioning and metadata. + +**Key Components:** +- **ContextStorage**: Abstract storage interface +- **SQLiteBackend**: SQLite-based implementation +- **StoredContextBlob**: Storage data models +- **ContextMetadata**: Metadata and versioning + +**Responsibilities:** +- Persist context blobs with versioning +- Store path-to-context mappings +- Manage hierarchical inheritance relationships +- Provide ACID guarantees for context operations + +### 3. Indexing & Semantic Search (`src/hcfs/indexing/`) + +The indexing layer provides semantic search capabilities over context blobs. + +**Key Components:** +- **EmbeddingEngine**: Generate embeddings for context content +- **SemanticSearch**: Vector similarity search +- **HybridRanker**: Combines BM25 + embedding scores + +**Responsibilities:** +- Generate and store embeddings for context blobs +- Provide semantic similarity search +- Rank results by relevance (hybrid BM25 + vector) +- Support context folding and summarization + +### 4. Agent API (`src/hcfs/api/`) + +The API layer exposes syscall-style functions for agent interaction. + +**Key Components:** +- **ContextAPI**: Main agent-facing API +- **ContextBlob**: Context data models +- **ContextPath**: Path representation +- **ContextQuery**: Query models + +**Core API Functions:** +```python +# Navigation +context_cd(path: str) -> bool +context_pwd() -> str + +# Retrieval +context_get(depth: int = 1) -> List[ContextBlob] +context_list(path: str = None) -> List[str] + +# Manipulation +context_push(path: str, blob: ContextBlob) -> str +context_delete(path: str, blob_id: str) -> bool + +# Subscription +context_subscribe(path: str, callback: Callable) -> str +context_unsubscribe(subscription_id: str) -> bool +``` + +### 5. Utilities (`src/hcfs/utils/`) + +Common utilities and configuration management. + +**Key Components:** +- **HCFSConfig**: Configuration management +- **path_utils**: Path manipulation utilities +- **logging**: Structured logging + +## Data Flow + +``` +Agent → ContextAPI → HCFSFilesystem → ContextStorage + ↓ ↓ + SemanticSearch ← EmbeddingEngine +``` + +### Example: Context Retrieval + +1. Agent calls `context_cd("/project/src/")` +2. ContextAPI validates path and sets current scope +3. HCFSFilesystem updates virtual directory state +4. Agent calls `context_get(depth=2)` +5. ContextAPI queries ContextStorage for context at `/project/src/` and `/project/` +6. SemanticSearch ranks and filters results +7. Merged context returned to agent + +### Example: Context Publishing + +1. Agent calls `context_push("/project/src/module.py", blob)` +2. ContextAPI validates blob and path +3. EmbeddingEngine generates embeddings for blob content +4. ContextStorage persists blob with versioning +5. Subscription notifications sent to interested agents + +## Hierarchical Inheritance + +Context blobs inherit from parent paths using configurable strategies: + +- **Append**: Child context appends to parent context +- **Override**: Child context overrides parent context +- **Merge**: Intelligent merging based on content type +- **Isolate**: No inheritance, child context standalone + +## Concurrency & Consistency + +- **Read Scalability**: Multiple agents can read simultaneously +- **Write Coordination**: Optimistic locking with conflict resolution +- **Versioning**: All context changes create new versions +- **Subscription**: Pub/sub notifications for context changes + +## Performance Considerations + +- **Caching**: LRU cache for frequently accessed contexts +- **Lazy Loading**: Context blobs loaded on-demand +- **Batch Operations**: Bulk context operations for efficiency +- **Index Optimization**: Separate indices for path, content, and metadata queries + +## Security Model + +- **Path Permissions**: ACL-based access control per path +- **Agent Authentication**: Token-based agent identification +- **Content Validation**: Schema validation for context blobs +- **Audit Logging**: All context operations logged for accountability \ No newline at end of file diff --git a/examples/basic_usage.py b/examples/basic_usage.py new file mode 100644 index 0000000..b27504e --- /dev/null +++ b/examples/basic_usage.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 +""" +HCFS Basic Usage Example + +This example demonstrates the core HCFS API for context navigation, +storage, and retrieval. +""" + +import asyncio +from datetime import datetime +from hcfs.api import ContextAPI, ContextBlob + + +async def basic_example(): + """Basic HCFS usage example.""" + + # Initialize HCFS API + api = ContextAPI() + + print("=== HCFS Basic Usage Example ===\n") + + # 1. Navigation + print("1. Navigation") + print(f"Current path: {api.context_pwd()}") + + # Create a project structure + await api.context_cd("/") + print(f"Changed to root: {api.context_pwd()}") + + await api.context_cd("/project") + print(f"Changed to project: {api.context_pwd()}") + + # 2. Creating context + print("\n2. Creating Context") + + # Add project-level context + project_context = ContextBlob( + content="AI-powered web application for task automation", + content_type="documentation", + tags=["project", "ai", "automation"], + metadata={ + "priority": "high", + "team": "ai-research", + "status": "active" + } + ) + + blob_id = await api.context_push("/project", project_context) + print(f"Created project context: {blob_id}") + + # Add source code context + src_context = ContextBlob( + content="Main application source code. Uses FastAPI for REST API, SQLAlchemy for ORM, and Pydantic for data validation.", + content_type="code", + tags=["fastapi", "sqlalchemy", "pydantic", "backend"], + metadata={ + "language": "python", + "framework": "fastapi" + } + ) + + await api.context_cd("/project/src") + blob_id = await api.context_push("/project/src", src_context) + print(f"Created src context: {blob_id}") + + # Add specific module context + auth_context = ContextBlob( + content="Authentication module using JWT tokens. Implements login, logout, and token refresh endpoints.", + content_type="code", + tags=["auth", "jwt", "security", "endpoints"], + metadata={ + "module": "auth.py", + "security_level": "high" + } + ) + + blob_id = await api.context_push("/project/src/auth.py", auth_context) + print(f"Created auth module context: {blob_id}") + + # 3. Context retrieval + print("\n3. Context Retrieval") + + # Get context from current path + await api.context_cd("/project/src/auth.py") + current_context = await api.context_get(depth=1) + print(f"Current path context ({len(current_context)} blobs):") + for blob in current_context: + print(f" - {blob.content_type}: {blob.content[:50]}...") + + # Get hierarchical context (current + parents) + hierarchical_context = await api.context_get(depth=3) + print(f"\nHierarchical context ({len(hierarchical_context)} blobs):") + for blob in hierarchical_context: + print(f" - {blob.content_type}: {blob.content[:50]}...") + + # 4. Semantic search + print("\n4. Semantic Search") + + # Search for authentication-related context + search_results = await api.context_search("authentication security") + print(f"Search results for 'authentication security' ({len(search_results)} results):") + for blob in search_results: + print(f" - Score: {blob.metadata.get('search_score', 'N/A')}") + print(f" Content: {blob.content[:60]}...") + print(f" Tags: {', '.join(blob.tags)}") + + # Search within specific scope + api_results = await api.context_search("API endpoints", scope="/project/src") + print(f"\nAPI-related results in /project/src ({len(api_results)} results):") + for blob in api_results: + print(f" - {blob.content[:50]}...") + + # 5. Directory listing + print("\n5. Directory Listing") + + await api.context_cd("/project") + paths = await api.context_ls() + print(f"Paths in /project: {paths}") + + await api.context_cd("/") + all_paths = await api.context_ls() + print(f"All top-level paths: {all_paths}") + + # 6. Context updates + print("\n6. Context Updates") + + # Update existing context + success = await api.context_update(blob_id, { + "content": "Enhanced authentication module with 2FA support and rate limiting", + "tags": ["auth", "jwt", "security", "endpoints", "2fa", "rate-limiting"], + "metadata": { + "module": "auth.py", + "security_level": "high", + "features": ["2fa", "rate_limiting"] + } + }) + print(f"Updated auth context: {success}") + + # Verify update + updated_context = await api.context_get(depth=1) + for blob in updated_context: + if blob.id == blob_id: + print(f"Updated content: {blob.content[:60]}...") + print(f"New tags: {', '.join(blob.tags)}") + + print("\n=== Example Complete ===") + + +async def subscription_example(): + """Example of context subscription for real-time updates.""" + + print("\n=== Subscription Example ===") + + api = ContextAPI() + + # Define callback for context changes + def on_context_change(event): + print(f"Context change detected:") + print(f" Path: {event.path}") + print(f" Type: {event.change_type}") + print(f" Blob ID: {event.blob_id}") + print(f" Timestamp: {event.timestamp}") + + # Subscribe to changes in project directory + sub_id = await api.context_subscribe( + "/project", + callback=on_context_change, + filters={"change_type": ["create", "update"]} + ) + print(f"Subscribed to /project changes: {sub_id}") + + # Simulate context changes + await asyncio.sleep(1) + + new_context = ContextBlob( + content="New feature implementation notes", + content_type="documentation", + tags=["feature", "implementation"], + metadata={"status": "draft"} + ) + + await api.context_push("/project/new_feature", new_context) + await asyncio.sleep(1) # Allow subscription callback to fire + + # Unsubscribe + success = await api.context_unsubscribe(sub_id) + print(f"Unsubscribed: {success}") + + +if __name__ == "__main__": + # Run basic example + asyncio.run(basic_example()) + + # Run subscription example + asyncio.run(subscription_example()) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7b83e21 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,129 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "hcfs" +version = "0.1.0" +description = "Context-Aware Hierarchical Context File System for agentic AI cognition" +readme = "README.md" +license = {file = "LICENSE"} +authors = [ + {name = "HCFS Project", email = "hcfs@deepblack.cloud"} +] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: System :: Filesystems", +] +dependencies = [ + "fuse-python>=1.0.0", + "sqlalchemy>=1.4.0", + "sentence-transformers>=2.0.0", + "numpy>=1.21.0", + "pydantic>=1.8.0", + "fastapi>=0.68.0", + "uvicorn>=0.15.0", + "python-multipart>=0.0.5", + "aiofiles>=0.7.0", + "click>=8.0.0", +] +requires-python = ">=3.8" + +[project.optional-dependencies] +dev = [ + "pytest>=6.0.0", + "pytest-asyncio>=0.18.0", + "pytest-cov>=2.12.0", + "black>=21.0.0", + "isort>=5.9.0", + "flake8>=3.9.0", + "mypy>=0.910", + "pre-commit>=2.15.0", +] +docs = [ + "sphinx>=4.0.0", + "sphinx-rtd-theme>=0.5.0", + "myst-parser>=0.15.0", +] +neo4j = [ + "neo4j>=4.4.0", +] +benchmark = [ + "memory-profiler>=0.60.0", + "psutil>=5.8.0", + "matplotlib>=3.4.0", +] + +[project.urls] +Homepage = "https://github.com/anthonyrawlins/hcfs" +Documentation = "https://hcfs.readthedocs.io" +Repository = "https://github.com/anthonyrawlins/hcfs" +"Bug Tracker" = "https://github.com/anthonyrawlins/hcfs/issues" + +[project.scripts] +hcfs = "hcfs.cli:main" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools.package-data] +hcfs = ["py.typed"] + +[tool.black] +line-length = 88 +target-version = ["py38", "py39", "py310", "py311", "py312"] + +[tool.isort] +profile = "black" +line_length = 88 + +[tool.mypy] +python_version = "3.8" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true +disallow_incomplete_defs = true +check_untyped_defs = true +disallow_untyped_decorators = true +no_implicit_optional = true +warn_redundant_casts = true +warn_unused_ignores = true +warn_no_return = true +warn_unreachable = true +strict_equality = true + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +addopts = "--cov=hcfs --cov-report=term-missing --cov-report=html" + +[tool.coverage.run] +source = ["src/hcfs"] +omit = ["*/tests/*", "*/test_*"] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "def __repr__", + "if self.debug:", + "if settings.DEBUG", + "raise AssertionError", + "raise NotImplementedError", + "if 0:", + "if __name__ == .__main__.:", + "class .*\\bProtocol\\):", + "@(abc\\.)?abstractmethod", +] \ No newline at end of file diff --git a/src/hcfs/__init__.py b/src/hcfs/__init__.py new file mode 100644 index 0000000..2fdbda7 --- /dev/null +++ b/src/hcfs/__init__.py @@ -0,0 +1,20 @@ +""" +HCFS - Hierarchical Context File System + +A virtual filesystem layer that maps hierarchical paths to context blobs +for agentic AI cognition. +""" + +__version__ = "0.1.0" +__author__ = "HCFS Project" +__email__ = "hcfs@deepblack.cloud" + +from .api import ContextAPI +from .filesystem import HCFSFilesystem +from .storage import ContextStorage + +__all__ = [ + "ContextAPI", + "HCFSFilesystem", + "ContextStorage", +] \ No newline at end of file diff --git a/src/hcfs/api/__init__.py b/src/hcfs/api/__init__.py new file mode 100644 index 0000000..6e1b681 --- /dev/null +++ b/src/hcfs/api/__init__.py @@ -0,0 +1,13 @@ +""" +HCFS API module - Agent-facing APIs for context navigation and manipulation. +""" + +from .context_api import ContextAPI +from .models import ContextBlob, ContextPath, ContextQuery + +__all__ = [ + "ContextAPI", + "ContextBlob", + "ContextPath", + "ContextQuery", +] \ No newline at end of file diff --git a/src/hcfs/filesystem/__init__.py b/src/hcfs/filesystem/__init__.py new file mode 100644 index 0000000..cfb3e70 --- /dev/null +++ b/src/hcfs/filesystem/__init__.py @@ -0,0 +1,11 @@ +""" +HCFS Filesystem module - Virtual filesystem layer using FUSE. +""" + +from .hcfs_filesystem import HCFSFilesystem +from .fuse_operations import HCFSFuseOperations + +__all__ = [ + "HCFSFilesystem", + "HCFSFuseOperations", +] \ No newline at end of file diff --git a/src/hcfs/indexing/__init__.py b/src/hcfs/indexing/__init__.py new file mode 100644 index 0000000..1a9d300 --- /dev/null +++ b/src/hcfs/indexing/__init__.py @@ -0,0 +1,13 @@ +""" +HCFS Indexing module - Semantic indexing and retrieval for context blobs. +""" + +from .embedding_engine import EmbeddingEngine +from .semantic_search import SemanticSearch +from .hybrid_ranker import HybridRanker + +__all__ = [ + "EmbeddingEngine", + "SemanticSearch", + "HybridRanker", +] \ No newline at end of file diff --git a/src/hcfs/storage/__init__.py b/src/hcfs/storage/__init__.py new file mode 100644 index 0000000..ef721ef --- /dev/null +++ b/src/hcfs/storage/__init__.py @@ -0,0 +1,14 @@ +""" +HCFS Storage module - Context blob storage and retrieval backend. +""" + +from .context_storage import ContextStorage +from .sqlite_backend import SQLiteBackend +from .models import StoredContextBlob, ContextMetadata + +__all__ = [ + "ContextStorage", + "SQLiteBackend", + "StoredContextBlob", + "ContextMetadata", +] \ No newline at end of file diff --git a/src/hcfs/utils/__init__.py b/src/hcfs/utils/__init__.py new file mode 100644 index 0000000..c1213d7 --- /dev/null +++ b/src/hcfs/utils/__init__.py @@ -0,0 +1,16 @@ +""" +HCFS Utils module - Common utilities and helper functions. +""" + +from .path_utils import normalize_path, is_valid_context_path, split_path +from .config import HCFSConfig, load_config +from .logging import get_logger + +__all__ = [ + "normalize_path", + "is_valid_context_path", + "split_path", + "HCFSConfig", + "load_config", + "get_logger", +] \ No newline at end of file