Files
HCFS/hcfs-python/hcfs/core/embeddings_trio.py
2025-07-30 09:34:16 +10:00

136 lines
4.9 KiB
Python

"""
Trio-compatible wrapper for OptimizedEmbeddingManager.
This module provides async compatibility for the optimized embedding system
to work with FUSE filesystem operations that require Trio async context.
"""
import trio
from typing import List, Dict, Optional, Tuple, Any
from .embeddings_optimized import OptimizedEmbeddingManager, VectorSearchResult
from .context_db import Context
class TrioOptimizedEmbeddingManager:
"""
Trio-compatible async wrapper for OptimizedEmbeddingManager.
"""
def __init__(self, sync_embedding_manager: OptimizedEmbeddingManager):
self.sync_manager = sync_embedding_manager
async def generate_embedding(self, text: str, use_cache: bool = True) -> 'np.ndarray':
"""Generate embedding asynchronously."""
return await trio.to_thread.run_sync(
self.sync_manager.generate_embedding,
text,
use_cache
)
async def generate_embeddings_batch(self, texts: List[str], use_cache: bool = True) -> List['np.ndarray']:
"""Generate embeddings for multiple texts asynchronously."""
return await trio.to_thread.run_sync(
self.sync_manager.generate_embeddings_batch,
texts,
use_cache
)
async def store_embedding(self, context_id: int, embedding: 'np.ndarray') -> None:
"""Store embedding asynchronously."""
await trio.to_thread.run_sync(
self.sync_manager.store_embedding,
context_id,
embedding
)
async def store_embeddings_batch(self, context_embeddings: List[Tuple[int, 'np.ndarray']]) -> None:
"""Store multiple embeddings asynchronously."""
await trio.to_thread.run_sync(
self.sync_manager.store_embeddings_batch,
context_embeddings
)
async def get_embedding(self, context_id: int) -> Optional['np.ndarray']:
"""Retrieve embedding asynchronously."""
return await trio.to_thread.run_sync(
self.sync_manager.get_embedding,
context_id
)
async def semantic_search_optimized(self,
query: str,
path_prefix: str = None,
top_k: int = 5,
include_contexts: bool = True) -> List[VectorSearchResult]:
"""Perform semantic search asynchronously."""
return await trio.to_thread.run_sync(
self.sync_manager.semantic_search_optimized,
query,
path_prefix,
top_k,
include_contexts
)
async def hybrid_search_optimized(self,
query: str,
path_prefix: str = None,
top_k: int = 5,
semantic_weight: float = 0.7,
rerank_top_n: int = 50) -> List[VectorSearchResult]:
"""Perform hybrid search asynchronously."""
return await trio.to_thread.run_sync(
self.sync_manager.hybrid_search_optimized,
query,
path_prefix,
top_k,
semantic_weight,
rerank_top_n
)
async def vector_similarity_search(self,
query_embedding: 'np.ndarray',
context_ids: Optional[List[int]] = None,
top_k: int = 10,
min_similarity: float = 0.0) -> List[VectorSearchResult]:
"""Perform vector similarity search asynchronously."""
return await trio.to_thread.run_sync(
self.sync_manager.vector_similarity_search,
query_embedding,
context_ids,
top_k,
min_similarity
)
async def build_embeddings_index(self, batch_size: int = 100) -> Dict[str, Any]:
"""Build embeddings index asynchronously."""
return await trio.to_thread.run_sync(
self.sync_manager.build_embeddings_index,
batch_size
)
async def get_statistics(self) -> Dict[str, Any]:
"""Get statistics asynchronously."""
return await trio.to_thread.run_sync(
self.sync_manager.get_statistics
)
async def cleanup_old_embeddings(self, days_old: int = 30) -> int:
"""Clean up old embeddings asynchronously."""
return await trio.to_thread.run_sync(
self.sync_manager.cleanup_old_embeddings,
days_old
)
# Synchronous access to underlying manager properties
@property
def model_config(self):
return self.sync_manager.model_config
@property
def vector_cache(self):
return self.sync_manager.vector_cache
@property
def batch_size(self):
return self.sync_manager.batch_size