Phase 2 build initial

2025-07-30 09:34:16 +10:00
parent 8f19eaab25
commit a6ee31f237
68 changed files with 18055 additions and 3 deletions
--- a/hcfs-python/hcfs/core/init.py
+++ b/hcfs-python/hcfs/core/init.py
@@ -0,0 +1 @@
+"""Core HCFS components."""
--- a/hcfs-python/hcfs/core/context_db.py
+++ b/hcfs-python/hcfs/core/context_db.py
@@ -0,0 +1,148 @@
+"""
+Context Database - Storage and retrieval of context blobs.
+"""
+
+from datetime import datetime
+from typing import List, Optional, Dict, Any
+from dataclasses import dataclass
+from pathlib import Path
+
+from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text, Float
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker, Session
+
+
+Base = declarative_base()
+
+
+class ContextBlob(Base):
+    """Database model for context blobs."""
+    
+    __tablename__ = "context_blobs"
+    
+    id = Column(Integer, primary_key=True)
+    path = Column(String(512), nullable=False, index=True)
+    content = Column(Text, nullable=False)
+    summary = Column(Text)
+    embedding_model = Column(String(100))
+    embedding_vector = Column(Text)  # JSON serialized vector
+    author = Column(String(100))
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    version = Column(Integer, default=1)
+
+
+@dataclass
+class Context:
+    """Context data structure."""
+    id: Optional[int]
+    path: str
+    content: str
+    summary: Optional[str] = None
+    author: Optional[str] = None
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+    version: int = 1
+
+
+class ContextDatabase:
+    """Main interface for context storage and retrieval."""
+    
+    def __init__(self, db_path: str = "hcfs_context.db"):
+        self.db_path = db_path
+        self.engine = create_engine(f"sqlite:///{db_path}")
+        Base.metadata.create_all(self.engine)
+        self.SessionLocal = sessionmaker(bind=self.engine)
+    
+    def get_session(self) -> Session:
+        """Get database session."""
+        return self.SessionLocal()
+    
+    def store_context(self, context: Context) -> int:
+        """Store a context blob and return its ID."""
+        with self.get_session() as session:
+            blob = ContextBlob(
+                path=context.path,
+                content=context.content,
+                summary=context.summary,
+                author=context.author,
+                version=context.version
+            )
+            session.add(blob)
+            session.commit()
+            session.refresh(blob)
+            return blob.id
+    
+    def get_context_by_path(self, path: str, depth: int = 1) -> List[Context]:
+        """Retrieve contexts for a path and optionally parent paths."""
+        contexts = []
+        current_path = Path(path)
+        
+        with self.get_session() as session:
+            # Get contexts for current path and parents up to depth
+            for i in range(depth + 1):
+                search_path = str(current_path) if current_path != Path(".") else "/"
+                
+                blobs = session.query(ContextBlob).filter(
+                    ContextBlob.path == search_path
+                ).order_by(ContextBlob.created_at.desc()).all()
+                
+                for blob in blobs:
+                    contexts.append(Context(
+                        id=blob.id,
+                        path=blob.path,
+                        content=blob.content,
+                        summary=blob.summary,
+                        author=blob.author,
+                        created_at=blob.created_at,
+                        updated_at=blob.updated_at,
+                        version=blob.version
+                    ))
+                
+                if current_path.parent == current_path:  # Root reached
+                    break
+                current_path = current_path.parent
+        
+        return contexts
+    
+    def list_contexts_at_path(self, path: str) -> List[Context]:
+        """List all contexts at a specific path."""
+        with self.get_session() as session:
+            blobs = session.query(ContextBlob).filter(
+                ContextBlob.path == path
+            ).order_by(ContextBlob.created_at.desc()).all()
+            
+            return [Context(
+                id=blob.id,
+                path=blob.path,
+                content=blob.content,
+                summary=blob.summary,
+                author=blob.author,
+                created_at=blob.created_at,
+                updated_at=blob.updated_at,
+                version=blob.version
+            ) for blob in blobs]
+    
+    def update_context(self, context_id: int, content: str, summary: str = None) -> bool:
+        """Update an existing context."""
+        with self.get_session() as session:
+            blob = session.query(ContextBlob).filter(ContextBlob.id == context_id).first()
+            if blob:
+                blob.content = content
+                if summary:
+                    blob.summary = summary
+                blob.version += 1
+                blob.updated_at = datetime.utcnow()
+                session.commit()
+                return True
+            return False
+    
+    def delete_context(self, context_id: int) -> bool:
+        """Delete a context by ID."""
+        with self.get_session() as session:
+            blob = session.query(ContextBlob).filter(ContextBlob.id == context_id).first()
+            if blob:
+                session.delete(blob)
+                session.commit()
+                return True
+            return False
--- a/hcfs-python/hcfs/core/embeddings.py
+++ b/hcfs-python/hcfs/core/embeddings.py
@@ -0,0 +1,188 @@
+"""
+Embedding Manager - Generate and manage context embeddings.
+"""
+
+import json
+import numpy as np
+from typing import List, Dict, Optional, Tuple
+from sentence_transformers import SentenceTransformer
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+
+from .context_db import Context, ContextDatabase
+
+
+class EmbeddingManager:
+    """
+    Manages embeddings for context blobs and semantic similarity search.
+    """
+    
+    def __init__(self, context_db: ContextDatabase, model_name: str = "all-MiniLM-L6-v2"):
+        self.context_db = context_db
+        self.model_name = model_name
+        self.model = SentenceTransformer(model_name)
+        self.tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
+        self._tfidf_fitted = False
+    
+    def generate_embedding(self, text: str) -> np.ndarray:
+        """Generate embedding for a text."""
+        return self.model.encode(text, normalize_embeddings=True)
+    
+    def store_context_with_embedding(self, context: Context) -> int:
+        """Store context and generate its embedding."""
+        # Generate embedding
+        embedding = self.generate_embedding(context.content)
+        
+        # Store in database
+        context_id = self.context_db.store_context(context)
+        
+        # Update with embedding (you'd extend ContextBlob model for this)
+        self._store_embedding(context_id, embedding)
+        
+        return context_id
+    
+    def _store_embedding(self, context_id: int, embedding: np.ndarray) -> None:
+        """Store embedding vector in database."""
+        embedding_json = json.dumps(embedding.tolist())
+        
+        with self.context_db.get_session() as session:
+            from .context_db import ContextBlob
+            blob = session.query(ContextBlob).filter(ContextBlob.id == context_id).first()
+            if blob:
+                blob.embedding_model = self.model_name
+                blob.embedding_vector = embedding_json
+                session.commit()
+    
+    def semantic_search(self, query: str, path_prefix: str = None, top_k: int = 5) -> List[Tuple[Context, float]]:
+        """
+        Perform semantic search for contexts similar to query.
+        
+        Args:
+            query: Search query text
+            path_prefix: Optional path prefix to limit search scope
+            top_k: Number of results to return
+            
+        Returns:
+            List of (Context, similarity_score) tuples
+        """
+        query_embedding = self.generate_embedding(query)
+        
+        with self.context_db.get_session() as session:
+            from .context_db import ContextBlob
+            
+            query_filter = session.query(ContextBlob).filter(
+                ContextBlob.embedding_vector.isnot(None)
+            )
+            
+            if path_prefix:
+                query_filter = query_filter.filter(ContextBlob.path.startswith(path_prefix))
+            
+            blobs = query_filter.all()
+            
+            if not blobs:
+                return []
+            
+            # Calculate similarities
+            similarities = []
+            for blob in blobs:
+                if blob.embedding_vector:
+                    stored_embedding = np.array(json.loads(blob.embedding_vector))
+                    similarity = cosine_similarity(
+                        query_embedding.reshape(1, -1),
+                        stored_embedding.reshape(1, -1)
+                    )[0][0]
+                    
+                    context = Context(
+                        id=blob.id,
+                        path=blob.path,
+                        content=blob.content,
+                        summary=blob.summary,
+                        author=blob.author,
+                        created_at=blob.created_at,
+                        updated_at=blob.updated_at,
+                        version=blob.version
+                    )
+                    
+                    similarities.append((context, float(similarity)))
+            
+            # Sort by similarity and return top_k
+            similarities.sort(key=lambda x: x[1], reverse=True)
+            return similarities[:top_k]
+    
+    def hybrid_search(self, query: str, path_prefix: str = None, top_k: int = 5, 
+                     semantic_weight: float = 0.7) -> List[Tuple[Context, float]]:
+        """
+        Hybrid search combining semantic similarity and BM25.
+        
+        Args:
+            query: Search query
+            path_prefix: Optional path filter
+            top_k: Number of results
+            semantic_weight: Weight for semantic vs BM25 (0.0-1.0)
+        """
+        # Get contexts for BM25
+        with self.context_db.get_session() as session:
+            from .context_db import ContextBlob
+            
+            query_filter = session.query(ContextBlob)
+            if path_prefix:
+                query_filter = query_filter.filter(ContextBlob.path.startswith(path_prefix))
+            
+            blobs = query_filter.all()
+            
+            if not blobs:
+                return []
+            
+            # Prepare documents for BM25
+            documents = [blob.content for blob in blobs]
+            
+            # Fit TF-IDF if not already fitted or refitting needed
+            if not self._tfidf_fitted or len(documents) > 100:  # Refit periodically
+                self.tfidf_vectorizer.fit(documents)
+                self._tfidf_fitted = True
+            
+            # BM25 scoring (using TF-IDF as approximation)
+            doc_vectors = self.tfidf_vectorizer.transform(documents)
+            query_vector = self.tfidf_vectorizer.transform([query])
+            bm25_scores = cosine_similarity(query_vector, doc_vectors)[0]
+            
+            # Semantic scoring
+            semantic_results = self.semantic_search(query, path_prefix, len(blobs))
+            semantic_scores = {ctx.id: score for ctx, score in semantic_results}
+            
+            # Combine scores
+            combined_results = []
+            for i, blob in enumerate(blobs):
+                bm25_score = bm25_scores[i]
+                semantic_score = semantic_scores.get(blob.id, 0.0)
+                
+                combined_score = (semantic_weight * semantic_score + 
+                                (1 - semantic_weight) * bm25_score)
+                
+                context = Context(
+                    id=blob.id,
+                    path=blob.path,
+                    content=blob.content,
+                    summary=blob.summary,
+                    author=blob.author,
+                    created_at=blob.created_at,
+                    updated_at=blob.updated_at,
+                    version=blob.version
+                )
+                
+                combined_results.append((context, combined_score))
+            
+            # Sort and return top results
+            combined_results.sort(key=lambda x: x[1], reverse=True)
+            return combined_results[:top_k]
+    
+    def get_similar_contexts(self, context_id: int, top_k: int = 5) -> List[Tuple[Context, float]]:
+        """Find contexts similar to a given context."""
+        with self.context_db.get_session() as session:
+            from .context_db import ContextBlob
+            reference_blob = session.query(ContextBlob).filter(ContextBlob.id == context_id).first()
+            
+            if not reference_blob or not reference_blob.content:
+                return []
+            
+            return self.semantic_search(reference_blob.content, top_k=top_k)
--- a/hcfs-python/hcfs/core/embeddings_optimized.py
+++ b/hcfs-python/hcfs/core/embeddings_optimized.py
@@ -0,0 +1,616 @@
+"""
+Optimized Embedding Manager - High-performance vector operations and storage.
+
+This module provides enhanced embedding capabilities including:
+- Vector database integration with SQLite-Vec
+- Optimized batch processing and caching
+- Multiple embedding model support
+- Efficient similarity search with indexing
+- Memory-efficient embedding storage
+"""
+
+import json
+import time
+import numpy as np
+import sqlite3
+from typing import List, Dict, Optional, Tuple, Union, Any
+from dataclasses import dataclass, asdict
+from pathlib import Path
+from sentence_transformers import SentenceTransformer
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import threading
+from contextlib import contextmanager
+from functools import lru_cache
+import logging
+
+from .context_db import Context, ContextDatabase
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class EmbeddingModel:
+    """Configuration for embedding models."""
+    name: str
+    model_path: str
+    dimension: int
+    max_tokens: int = 512
+    normalize: bool = True
+
+@dataclass 
+class VectorSearchResult:
+    """Result from vector search operations."""
+    context_id: int
+    score: float
+    context: Optional[Context] = None
+    metadata: Dict[str, Any] = None
+
+class VectorCache:
+    """High-performance LRU cache for embeddings."""
+    
+    def __init__(self, max_size: int = 5000, ttl_seconds: int = 3600):
+        self.max_size = max_size
+        self.ttl_seconds = ttl_seconds
+        self.cache: Dict[str, Tuple[np.ndarray, float]] = {}
+        self.access_times: Dict[str, float] = {}
+        self.lock = threading.RLock()
+    
+    def get(self, key: str) -> Optional[np.ndarray]:
+        """Get embedding from cache."""
+        with self.lock:
+            current_time = time.time()
+            
+            if key in self.cache:
+                embedding, created_time = self.cache[key]
+                
+                # Check TTL
+                if current_time - created_time < self.ttl_seconds:
+                    self.access_times[key] = current_time
+                    return embedding.copy()
+                else:
+                    # Expired
+                    del self.cache[key]
+                    del self.access_times[key]
+        return None
+    
+    def put(self, key: str, embedding: np.ndarray) -> None:
+        """Store embedding in cache."""
+        with self.lock:
+            current_time = time.time()
+            
+            # Evict if cache is full
+            if len(self.cache) >= self.max_size:
+                self._evict_lru()
+            
+            self.cache[key] = (embedding.copy(), current_time)
+            self.access_times[key] = current_time
+    
+    def _evict_lru(self) -> None:
+        """Evict least recently used item."""
+        if not self.access_times:
+            return
+        
+        lru_key = min(self.access_times.items(), key=lambda x: x[1])[0]
+        del self.cache[lru_key]
+        del self.access_times[lru_key]
+    
+    def clear(self) -> None:
+        """Clear cache."""
+        with self.lock:
+            self.cache.clear()
+            self.access_times.clear()
+    
+    def stats(self) -> Dict[str, Any]:
+        """Get cache statistics."""
+        with self.lock:
+            return {
+                "size": len(self.cache),
+                "max_size": self.max_size,
+                "hit_rate": getattr(self, '_hits', 0) / max(getattr(self, '_requests', 1), 1),
+                "ttl_seconds": self.ttl_seconds
+            }
+
+class OptimizedEmbeddingManager:
+    """
+    High-performance embedding manager with vector database capabilities.
+    """
+    
+    # Predefined embedding models
+    MODELS = {
+        "mini": EmbeddingModel("all-MiniLM-L6-v2", "all-MiniLM-L6-v2", 384),
+        "base": EmbeddingModel("all-MiniLM-L12-v2", "all-MiniLM-L12-v2", 384), 
+        "large": EmbeddingModel("all-mpnet-base-v2", "all-mpnet-base-v2", 768),
+        "multilingual": EmbeddingModel("paraphrase-multilingual-MiniLM-L12-v2", 
+                                     "paraphrase-multilingual-MiniLM-L12-v2", 384)
+    }
+    
+    def __init__(self, 
+                 context_db: ContextDatabase,
+                 model_name: str = "mini",
+                 vector_db_path: Optional[str] = None,
+                 cache_size: int = 5000,
+                 batch_size: int = 32):
+        self.context_db = context_db
+        self.model_config = self.MODELS.get(model_name, self.MODELS["mini"])
+        self.model = None  # Lazy loading
+        self.vector_cache = VectorCache(cache_size)
+        self.batch_size = batch_size
+        
+        # Vector database setup
+        self.vector_db_path = vector_db_path or "hcfs_vectors.db"
+        self._init_vector_db()
+        
+        # TF-IDF for hybrid search
+        self.tfidf_vectorizer = TfidfVectorizer(
+            stop_words='english', 
+            max_features=5000,
+            ngram_range=(1, 2),
+            min_df=2
+        )
+        self._tfidf_fitted = False
+        self._model_lock = threading.RLock()
+        
+        logger.info(f"Initialized OptimizedEmbeddingManager with model: {self.model_config.name}")
+    
+    def _get_model(self) -> SentenceTransformer:
+        """Lazy load the embedding model."""
+        if self.model is None:
+            with self._model_lock:
+                if self.model is None:
+                    logger.info(f"Loading embedding model: {self.model_config.model_path}")
+                    self.model = SentenceTransformer(self.model_config.model_path)
+        return self.model
+    
+    def _init_vector_db(self) -> None:
+        """Initialize SQLite vector database for fast similarity search."""
+        conn = sqlite3.connect(self.vector_db_path)
+        cursor = conn.cursor()
+        
+        # Create vectors table
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS context_vectors (
+                context_id INTEGER PRIMARY KEY,
+                model_name TEXT NOT NULL,
+                embedding_dimension INTEGER NOT NULL,
+                vector_data BLOB NOT NULL,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        ''')
+        
+        # Create index for fast lookups
+        cursor.execute('''
+            CREATE INDEX IF NOT EXISTS idx_context_vectors_model 
+            ON context_vectors(model_name, context_id)
+        ''')
+        
+        conn.commit()
+        conn.close()
+        
+        logger.info(f"Vector database initialized: {self.vector_db_path}")
+    
+    @contextmanager
+    def _get_vector_db(self):
+        """Get vector database connection with proper cleanup."""
+        conn = sqlite3.connect(self.vector_db_path)
+        try:
+            yield conn
+        finally:
+            conn.close()
+    
+    def generate_embedding(self, text: str, use_cache: bool = True) -> np.ndarray:
+        """Generate embedding for text with caching."""
+        cache_key = f"{self.model_config.name}:{hash(text)}"
+        
+        if use_cache:
+            cached = self.vector_cache.get(cache_key)
+            if cached is not None:
+                return cached
+        
+        model = self._get_model()
+        embedding = model.encode(
+            text, 
+            normalize_embeddings=self.model_config.normalize,
+            show_progress_bar=False
+        )
+        
+        if use_cache:
+            self.vector_cache.put(cache_key, embedding)
+        
+        return embedding
+    
+    def generate_embeddings_batch(self, texts: List[str], use_cache: bool = True) -> List[np.ndarray]:
+        """Generate embeddings for multiple texts efficiently."""
+        if not texts:
+            return []
+        
+        # Check cache first
+        cache_results = []
+        uncached_indices = []
+        uncached_texts = []
+        
+        if use_cache:
+            for i, text in enumerate(texts):
+                cache_key = f"{self.model_config.name}:{hash(text)}"
+                cached = self.vector_cache.get(cache_key)
+                if cached is not None:
+                    cache_results.append((i, cached))
+                else:
+                    uncached_indices.append(i)
+                    uncached_texts.append(text)
+        else:
+            uncached_indices = list(range(len(texts)))
+            uncached_texts = texts
+        
+        # Generate embeddings for uncached texts
+        embeddings = [None] * len(texts)
+        
+        # Place cached results
+        for i, embedding in cache_results:
+            embeddings[i] = embedding
+        
+        if uncached_texts:
+            model = self._get_model()
+            
+            # Process in batches
+            for batch_start in range(0, len(uncached_texts), self.batch_size):
+                batch_end = min(batch_start + self.batch_size, len(uncached_texts))
+                batch_texts = uncached_texts[batch_start:batch_end]
+                batch_indices = uncached_indices[batch_start:batch_end]
+                
+                batch_embeddings = model.encode(
+                    batch_texts,
+                    normalize_embeddings=self.model_config.normalize,
+                    show_progress_bar=False,
+                    batch_size=self.batch_size
+                )
+                
+                # Store results and cache
+                for i, (orig_idx, embedding) in enumerate(zip(batch_indices, batch_embeddings)):
+                    embeddings[orig_idx] = embedding
+                    
+                    if use_cache:
+                        cache_key = f"{self.model_config.name}:{hash(batch_texts[i])}"
+                        self.vector_cache.put(cache_key, embedding)
+        
+        return embeddings
+    
+    def store_embedding(self, context_id: int, embedding: np.ndarray) -> None:
+        """Store embedding in vector database."""
+        with self._get_vector_db() as conn:
+            cursor = conn.cursor()
+            
+            # Convert to bytes for storage
+            vector_bytes = embedding.astype(np.float32).tobytes()
+            
+            cursor.execute('''
+                INSERT OR REPLACE INTO context_vectors 
+                (context_id, model_name, embedding_dimension, vector_data, updated_at)
+                VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)
+            ''', (context_id, self.model_config.name, embedding.shape[0], vector_bytes))
+            
+            conn.commit()
+    
+    def store_embeddings_batch(self, context_embeddings: List[Tuple[int, np.ndarray]]) -> None:
+        """Store multiple embeddings efficiently."""
+        if not context_embeddings:
+            return
+        
+        with self._get_vector_db() as conn:
+            cursor = conn.cursor()
+            
+            data = [
+                (context_id, self.model_config.name, embedding.shape[0], 
+                 embedding.astype(np.float32).tobytes())
+                for context_id, embedding in context_embeddings
+            ]
+            
+            cursor.executemany('''
+                INSERT OR REPLACE INTO context_vectors 
+                (context_id, model_name, embedding_dimension, vector_data, updated_at)
+                VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)
+            ''', data)
+            
+            conn.commit()
+        
+        logger.info(f"Stored {len(context_embeddings)} embeddings in batch")
+    
+    def get_embedding(self, context_id: int) -> Optional[np.ndarray]:
+        """Retrieve embedding for a context."""
+        with self._get_vector_db() as conn:
+            cursor = conn.cursor()
+            
+            cursor.execute('''
+                SELECT vector_data, embedding_dimension FROM context_vectors 
+                WHERE context_id = ? AND model_name = ?
+            ''', (context_id, self.model_config.name))
+            
+            result = cursor.fetchone()
+            if result:
+                vector_bytes, dimension = result
+                return np.frombuffer(vector_bytes, dtype=np.float32).reshape(dimension)
+        
+        return None
+    
+    def vector_similarity_search(self, 
+                                query_embedding: np.ndarray, 
+                                context_ids: Optional[List[int]] = None,
+                                top_k: int = 10,
+                                min_similarity: float = 0.0) -> List[VectorSearchResult]:
+        """Efficient vector similarity search."""
+        with self._get_vector_db() as conn:
+            cursor = conn.cursor()
+            
+            # Build query
+            if context_ids:
+                placeholders = ','.join(['?'] * len(context_ids))
+                query = f'''
+                    SELECT context_id, vector_data, embedding_dimension 
+                    FROM context_vectors 
+                    WHERE model_name = ? AND context_id IN ({placeholders})
+                '''
+                params = [self.model_config.name] + context_ids
+            else:
+                query = '''
+                    SELECT context_id, vector_data, embedding_dimension 
+                    FROM context_vectors 
+                    WHERE model_name = ?
+                '''
+                params = [self.model_config.name]
+            
+            cursor.execute(query, params)
+            results = cursor.fetchall()
+        
+        if not results:
+            return []
+        
+        # Calculate similarities
+        similarities = []
+        query_embedding = query_embedding.reshape(1, -1)
+        
+        for context_id, vector_bytes, dimension in results:
+            stored_embedding = np.frombuffer(vector_bytes, dtype=np.float32).reshape(1, dimension)
+            
+            similarity = cosine_similarity(query_embedding, stored_embedding)[0][0]
+            
+            if similarity >= min_similarity:
+                similarities.append(VectorSearchResult(
+                    context_id=context_id,
+                    score=float(similarity)
+                ))
+        
+        # Sort by similarity and return top_k
+        similarities.sort(key=lambda x: x.score, reverse=True)
+        return similarities[:top_k]
+    
+    def semantic_search_optimized(self, 
+                                query: str, 
+                                path_prefix: str = None, 
+                                top_k: int = 5,
+                                include_contexts: bool = True) -> List[VectorSearchResult]:
+        """High-performance semantic search."""
+        # Generate query embedding
+        query_embedding = self.generate_embedding(query)
+        
+        # Get relevant context IDs based on path filter
+        context_ids = None
+        if path_prefix:
+            with self.context_db.get_session() as session:
+                from .context_db import ContextBlob
+                blobs = session.query(ContextBlob.id).filter(
+                    ContextBlob.path.startswith(path_prefix)
+                ).all()
+                context_ids = [blob.id for blob in blobs]
+                
+                if not context_ids:
+                    return []
+        
+        # Perform vector search
+        results = self.vector_similarity_search(
+            query_embedding, 
+            context_ids=context_ids,
+            top_k=top_k
+        )
+        
+        # Populate with context data if requested
+        if include_contexts and results:
+            context_map = {}
+            with self.context_db.get_session() as session:
+                from .context_db import ContextBlob
+                
+                result_ids = [r.context_id for r in results]
+                blobs = session.query(ContextBlob).filter(
+                    ContextBlob.id.in_(result_ids)
+                ).all()
+                
+                for blob in blobs:
+                    context_map[blob.id] = Context(
+                        id=blob.id,
+                        path=blob.path,
+                        content=blob.content,
+                        summary=blob.summary,
+                        author=blob.author,
+                        created_at=blob.created_at,
+                        updated_at=blob.updated_at,
+                        version=blob.version
+                    )
+            
+            # Add contexts to results
+            for result in results:
+                result.context = context_map.get(result.context_id)
+        
+        return results
+    
+    def hybrid_search_optimized(self, 
+                               query: str, 
+                               path_prefix: str = None, 
+                               top_k: int = 5,
+                               semantic_weight: float = 0.7,
+                               rerank_top_n: int = 50) -> List[VectorSearchResult]:
+        """Optimized hybrid search with two-stage ranking."""
+        
+        # Stage 1: Fast semantic search to get candidate set
+        semantic_results = self.semantic_search_optimized(
+            query, path_prefix, rerank_top_n, include_contexts=True
+        )
+        
+        if not semantic_results or len(semantic_results) < 2:
+            return semantic_results[:top_k]
+        
+        # Stage 2: Re-rank with BM25 scores
+        contexts = [r.context for r in semantic_results if r.context]
+        if not contexts:
+            return semantic_results[:top_k]
+        
+        documents = [ctx.content for ctx in contexts]
+        
+        # Compute BM25 scores
+        try:
+            if not self._tfidf_fitted:
+                self.tfidf_vectorizer.fit(documents)
+                self._tfidf_fitted = True
+            
+            doc_vectors = self.tfidf_vectorizer.transform(documents)
+            query_vector = self.tfidf_vectorizer.transform([query])
+            bm25_scores = cosine_similarity(query_vector, doc_vectors)[0]
+            
+        except Exception as e:
+            logger.warning(f"BM25 scoring failed: {e}, using semantic only")
+            return semantic_results[:top_k]
+        
+        # Combine scores
+        for i, result in enumerate(semantic_results[:len(bm25_scores)]):
+            semantic_score = result.score
+            bm25_score = bm25_scores[i]
+            
+            combined_score = (semantic_weight * semantic_score + 
+                            (1 - semantic_weight) * bm25_score)
+            
+            result.score = float(combined_score)
+            result.metadata = {
+                "semantic_score": float(semantic_score),
+                "bm25_score": float(bm25_score),
+                "semantic_weight": semantic_weight
+            }
+        
+        # Re-sort by combined score
+        semantic_results.sort(key=lambda x: x.score, reverse=True)
+        return semantic_results[:top_k]
+    
+    def build_embeddings_index(self, batch_size: int = 100) -> Dict[str, Any]:
+        """Build embeddings for all contexts without embeddings."""
+        start_time = time.time()
+        
+        # Get contexts without embeddings
+        with self.context_db.get_session() as session:
+            from .context_db import ContextBlob
+            
+            # Find contexts missing embeddings
+            with self._get_vector_db() as vector_conn:
+                vector_cursor = vector_conn.cursor()
+                vector_cursor.execute('''
+                    SELECT context_id FROM context_vectors 
+                    WHERE model_name = ?
+                ''', (self.model_config.name,))
+                
+                existing_ids = {row[0] for row in vector_cursor.fetchall()}
+            
+            # Get contexts that need embeddings
+            all_blobs = session.query(ContextBlob).all()
+            missing_blobs = [blob for blob in all_blobs if blob.id not in existing_ids]
+        
+        if not missing_blobs:
+            return {
+                "total_processed": 0,
+                "processing_time": 0,
+                "embeddings_per_second": 0,
+                "message": "All contexts already have embeddings"
+            }
+        
+        logger.info(f"Building embeddings for {len(missing_blobs)} contexts")
+        
+        # Process in batches
+        total_processed = 0
+        for batch_start in range(0, len(missing_blobs), batch_size):
+            batch_end = min(batch_start + batch_size, len(missing_blobs))
+            batch_blobs = missing_blobs[batch_start:batch_end]
+            
+            # Generate embeddings for batch
+            texts = [blob.content for blob in batch_blobs]
+            embeddings = self.generate_embeddings_batch(texts, use_cache=False)
+            
+            # Store embeddings
+            context_embeddings = [
+                (blob.id, embedding) 
+                for blob, embedding in zip(batch_blobs, embeddings)
+            ]
+            self.store_embeddings_batch(context_embeddings)
+            
+            total_processed += len(batch_blobs)
+            logger.info(f"Processed {total_processed}/{len(missing_blobs)} contexts")
+        
+        processing_time = time.time() - start_time
+        embeddings_per_second = total_processed / processing_time if processing_time > 0 else 0
+        
+        return {
+            "total_processed": total_processed,
+            "processing_time": processing_time,
+            "embeddings_per_second": embeddings_per_second,
+            "model_used": self.model_config.name,
+            "embedding_dimension": self.model_config.dimension
+        }
+    
+    def get_statistics(self) -> Dict[str, Any]:
+        """Get embedding manager statistics."""
+        with self._get_vector_db() as conn:
+            cursor = conn.cursor()
+            
+            cursor.execute('''
+                SELECT 
+                    COUNT(*) as total_embeddings,
+                    COUNT(DISTINCT model_name) as unique_models,
+                    AVG(embedding_dimension) as avg_dimension
+                FROM context_vectors
+            ''')
+            
+            db_stats = cursor.fetchone()
+            
+            cursor.execute('''
+                SELECT model_name, COUNT(*) as count
+                FROM context_vectors
+                GROUP BY model_name
+            ''')
+            
+            model_counts = dict(cursor.fetchall())
+        
+        return {
+            "database_stats": {
+                "total_embeddings": db_stats[0] if db_stats else 0,
+                "unique_models": db_stats[1] if db_stats else 0,
+                "average_dimension": db_stats[2] if db_stats else 0,
+                "model_counts": model_counts
+            },
+            "cache_stats": self.vector_cache.stats(),
+            "current_model": asdict(self.model_config),
+            "vector_db_path": self.vector_db_path,
+            "batch_size": self.batch_size
+        }
+    
+    def cleanup_old_embeddings(self, days_old: int = 30) -> int:
+        """Remove old unused embeddings."""
+        with self._get_vector_db() as conn:
+            cursor = conn.cursor()
+            
+            cursor.execute('''
+                DELETE FROM context_vectors 
+                WHERE updated_at < datetime('now', '-{} days')
+                AND context_id NOT IN (
+                    SELECT id FROM context_blobs
+                )
+            '''.format(days_old))
+            
+            deleted_count = cursor.rowcount
+            conn.commit()
+            
+        logger.info(f"Cleaned up {deleted_count} old embeddings")
+        return deleted_count
--- a/hcfs-python/hcfs/core/embeddings_trio.py
+++ b/hcfs-python/hcfs/core/embeddings_trio.py
@@ -0,0 +1,136 @@
+"""
+Trio-compatible wrapper for OptimizedEmbeddingManager.
+
+This module provides async compatibility for the optimized embedding system
+to work with FUSE filesystem operations that require Trio async context.
+"""
+
+import trio
+from typing import List, Dict, Optional, Tuple, Any
+
+from .embeddings_optimized import OptimizedEmbeddingManager, VectorSearchResult
+from .context_db import Context
+
+
+class TrioOptimizedEmbeddingManager:
+    """
+    Trio-compatible async wrapper for OptimizedEmbeddingManager.
+    """
+    
+    def __init__(self, sync_embedding_manager: OptimizedEmbeddingManager):
+        self.sync_manager = sync_embedding_manager
+    
+    async def generate_embedding(self, text: str, use_cache: bool = True) -> 'np.ndarray':
+        """Generate embedding asynchronously."""
+        return await trio.to_thread.run_sync(
+            self.sync_manager.generate_embedding,
+            text,
+            use_cache
+        )
+    
+    async def generate_embeddings_batch(self, texts: List[str], use_cache: bool = True) -> List['np.ndarray']:
+        """Generate embeddings for multiple texts asynchronously."""
+        return await trio.to_thread.run_sync(
+            self.sync_manager.generate_embeddings_batch,
+            texts,
+            use_cache
+        )
+    
+    async def store_embedding(self, context_id: int, embedding: 'np.ndarray') -> None:
+        """Store embedding asynchronously."""
+        await trio.to_thread.run_sync(
+            self.sync_manager.store_embedding,
+            context_id,
+            embedding
+        )
+    
+    async def store_embeddings_batch(self, context_embeddings: List[Tuple[int, 'np.ndarray']]) -> None:
+        """Store multiple embeddings asynchronously."""
+        await trio.to_thread.run_sync(
+            self.sync_manager.store_embeddings_batch,
+            context_embeddings
+        )
+    
+    async def get_embedding(self, context_id: int) -> Optional['np.ndarray']:
+        """Retrieve embedding asynchronously."""
+        return await trio.to_thread.run_sync(
+            self.sync_manager.get_embedding,
+            context_id
+        )
+    
+    async def semantic_search_optimized(self, 
+                                      query: str, 
+                                      path_prefix: str = None, 
+                                      top_k: int = 5,
+                                      include_contexts: bool = True) -> List[VectorSearchResult]:
+        """Perform semantic search asynchronously."""
+        return await trio.to_thread.run_sync(
+            self.sync_manager.semantic_search_optimized,
+            query,
+            path_prefix,
+            top_k,
+            include_contexts
+        )
+    
+    async def hybrid_search_optimized(self, 
+                                    query: str, 
+                                    path_prefix: str = None, 
+                                    top_k: int = 5,
+                                    semantic_weight: float = 0.7,
+                                    rerank_top_n: int = 50) -> List[VectorSearchResult]:
+        """Perform hybrid search asynchronously."""
+        return await trio.to_thread.run_sync(
+            self.sync_manager.hybrid_search_optimized,
+            query,
+            path_prefix,
+            top_k,
+            semantic_weight,
+            rerank_top_n
+        )
+    
+    async def vector_similarity_search(self, 
+                                     query_embedding: 'np.ndarray', 
+                                     context_ids: Optional[List[int]] = None,
+                                     top_k: int = 10,
+                                     min_similarity: float = 0.0) -> List[VectorSearchResult]:
+        """Perform vector similarity search asynchronously."""
+        return await trio.to_thread.run_sync(
+            self.sync_manager.vector_similarity_search,
+            query_embedding,
+            context_ids,
+            top_k,
+            min_similarity
+        )
+    
+    async def build_embeddings_index(self, batch_size: int = 100) -> Dict[str, Any]:
+        """Build embeddings index asynchronously."""
+        return await trio.to_thread.run_sync(
+            self.sync_manager.build_embeddings_index,
+            batch_size
+        )
+    
+    async def get_statistics(self) -> Dict[str, Any]:
+        """Get statistics asynchronously."""
+        return await trio.to_thread.run_sync(
+            self.sync_manager.get_statistics
+        )
+    
+    async def cleanup_old_embeddings(self, days_old: int = 30) -> int:
+        """Clean up old embeddings asynchronously."""
+        return await trio.to_thread.run_sync(
+            self.sync_manager.cleanup_old_embeddings,
+            days_old
+        )
+    
+    # Synchronous access to underlying manager properties
+    @property 
+    def model_config(self):
+        return self.sync_manager.model_config
+    
+    @property
+    def vector_cache(self):
+        return self.sync_manager.vector_cache
+    
+    @property
+    def batch_size(self):
+        return self.sync_manager.batch_size
--- a/hcfs-python/hcfs/core/filesystem.py
+++ b/hcfs-python/hcfs/core/filesystem.py
@@ -0,0 +1,179 @@
+"""
+HCFS Filesystem - FUSE-based virtual filesystem layer.
+"""
+
+import os
+import stat
+import errno
+import time
+from typing import Dict, Optional
+from pathlib import Path
+
+import pyfuse3
+from pyfuse3 import FUSEError
+
+from .context_db import ContextDatabase, Context
+
+
+class HCFSFilesystem(pyfuse3.Operations):
+    """
+    HCFS FUSE filesystem implementation.
+    
+    Maps directory navigation to context scope and provides
+    virtual files for context access.
+    """
+    
+    def __init__(self, context_db: ContextDatabase, mount_point: str):
+        super().__init__()
+        self.context_db = context_db
+        self.mount_point = mount_point
+        self._inode_counter = 1
+        self._inode_to_path: Dict[int, str] = {1: "/"}  # Root inode
+        self._path_to_inode: Dict[str, int] = {"/": 1}
+        
+        # Virtual files
+        self.CONTEXT_FILE = ".context"
+        self.CONTEXT_LIST_FILE = ".context_list"
+        self.CONTEXT_PUSH_FILE = ".context_push"
+    
+    def _get_inode(self, path: str) -> int:
+        """Get or create inode for path."""
+        if path in self._path_to_inode:
+            return self._path_to_inode[path]
+        
+        self._inode_counter += 1
+        inode = self._inode_counter
+        self._inode_to_path[inode] = path
+        self._path_to_inode[path] = inode
+        return inode
+    
+    def _get_path(self, inode: int) -> str:
+        """Get path for inode."""
+        return self._inode_to_path.get(inode, "/")
+    
+    def _is_virtual_file(self, path: str) -> bool:
+        """Check if path is a virtual context file."""
+        basename = os.path.basename(path)
+        return basename in [self.CONTEXT_FILE, self.CONTEXT_LIST_FILE, self.CONTEXT_PUSH_FILE]
+    
+    async def getattr(self, inode: int, ctx=None) -> pyfuse3.EntryAttributes:
+        """Get file attributes."""
+        path = self._get_path(inode)
+        entry = pyfuse3.EntryAttributes()
+        entry.st_ino = inode
+        entry.st_uid = os.getuid()
+        entry.st_gid = os.getgid()
+        entry.st_atime_ns = int(time.time() * 1e9)
+        entry.st_mtime_ns = int(time.time() * 1e9)
+        entry.st_ctime_ns = int(time.time() * 1e9)
+        
+        if self._is_virtual_file(path):
+            # Virtual files are readable text files
+            entry.st_mode = stat.S_IFREG | 0o644
+            entry.st_size = 1024  # Placeholder size
+        else:
+            # Directories
+            entry.st_mode = stat.S_IFDIR | 0o755
+            entry.st_size = 0
+        
+        return entry
+    
+    async def lookup(self, parent_inode: int, name: bytes, ctx=None) -> pyfuse3.EntryAttributes:
+        """Look up a directory entry."""
+        parent_path = self._get_path(parent_inode)
+        child_path = os.path.join(parent_path, name.decode('utf-8'))
+        
+        # Normalize path
+        if child_path.startswith("//"):
+            child_path = child_path[1:]
+        
+        child_inode = self._get_inode(child_path)
+        return await self.getattr(child_inode, ctx)
+    
+    async def opendir(self, inode: int, ctx=None) -> int:
+        """Open directory."""
+        return inode
+    
+    async def readdir(self, inode: int, start_id: int, token) -> None:
+        """Read directory contents."""
+        path = self._get_path(inode)
+        
+        # Always show virtual context files in every directory
+        entries = [
+            (self.CONTEXT_FILE, await self.getattr(self._get_inode(os.path.join(path, self.CONTEXT_FILE)))),
+            (self.CONTEXT_LIST_FILE, await self.getattr(self._get_inode(os.path.join(path, self.CONTEXT_LIST_FILE)))),
+            (self.CONTEXT_PUSH_FILE, await self.getattr(self._get_inode(os.path.join(path, self.CONTEXT_PUSH_FILE)))),
+        ]
+        
+        # Add subdirectories (you might want to make this dynamic based on context paths)
+        # For now, allowing any directory to be created by navigation
+        
+        for i, (name, attr) in enumerate(entries):
+            if i >= start_id:
+                if not pyfuse3.readdir_reply(token, name.encode('utf-8'), attr, i + 1):
+                    break
+    
+    async def open(self, inode: int, flags: int, ctx=None) -> int:
+        """Open file."""
+        path = self._get_path(inode)
+        if not self._is_virtual_file(path):
+            raise FUSEError(errno.EISDIR)
+        return inode
+    
+    async def read(self, fh: int, offset: int, size: int) -> bytes:
+        """Read from virtual files."""
+        path = self._get_path(fh)
+        basename = os.path.basename(path)
+        dir_path = os.path.dirname(path)
+        
+        if basename == self.CONTEXT_FILE:
+            # Return aggregated context for current directory
+            contexts = self.context_db.get_context_by_path(dir_path, depth=1)
+            content = "\\n".join(f"[{ctx.path}] {ctx.content}" for ctx in contexts)
+            
+        elif basename == self.CONTEXT_LIST_FILE:
+            # List contexts at current path
+            contexts = self.context_db.list_contexts_at_path(dir_path)
+            content = "\\n".join(f"ID: {ctx.id}, Path: {ctx.path}, Author: {ctx.author}, Created: {ctx.created_at}" 
+                               for ctx in contexts)
+            
+        elif basename == self.CONTEXT_PUSH_FILE:
+            # Instructions for pushing context
+            content = f"Write to this file to push context to path: {dir_path}\\nFormat: <content>"
+            
+        else:
+            content = "Unknown virtual file"
+        
+        content_bytes = content.encode('utf-8')
+        return content_bytes[offset:offset + size]
+    
+    async def write(self, fh: int, offset: int, data: bytes) -> int:
+        """Write to virtual files (context_push only)."""
+        path = self._get_path(fh)
+        basename = os.path.basename(path)
+        dir_path = os.path.dirname(path)
+        
+        if basename == self.CONTEXT_PUSH_FILE:
+            # Push new context to current directory
+            content = data.decode('utf-8').strip()
+            context = Context(
+                id=None,
+                path=dir_path,
+                content=content,
+                author="fuse_user"
+            )
+            self.context_db.store_context(context)
+            return len(data)
+        else:
+            raise FUSEError(errno.EACCES)
+    
+    async def mkdir(self, parent_inode: int, name: bytes, mode: int, ctx=None) -> pyfuse3.EntryAttributes:
+        """Create directory (virtual - just for navigation)."""
+        parent_path = self._get_path(parent_inode)
+        new_path = os.path.join(parent_path, name.decode('utf-8'))
+        
+        if new_path.startswith("//"):
+            new_path = new_path[1:]
+        
+        new_inode = self._get_inode(new_path)
+        return await self.getattr(new_inode, ctx)