Phase 2 build initial
This commit is contained in:
		
							
								
								
									
										136
									
								
								hcfs-python/hcfs/core/embeddings_trio.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								hcfs-python/hcfs/core/embeddings_trio.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,136 @@ | ||||
| """ | ||||
| Trio-compatible wrapper for OptimizedEmbeddingManager. | ||||
|  | ||||
| This module provides async compatibility for the optimized embedding system | ||||
| to work with FUSE filesystem operations that require Trio async context. | ||||
| """ | ||||
|  | ||||
| import trio | ||||
| from typing import List, Dict, Optional, Tuple, Any | ||||
|  | ||||
| from .embeddings_optimized import OptimizedEmbeddingManager, VectorSearchResult | ||||
| from .context_db import Context | ||||
|  | ||||
|  | ||||
| class TrioOptimizedEmbeddingManager: | ||||
|     """ | ||||
|     Trio-compatible async wrapper for OptimizedEmbeddingManager. | ||||
|     """ | ||||
|      | ||||
|     def __init__(self, sync_embedding_manager: OptimizedEmbeddingManager): | ||||
|         self.sync_manager = sync_embedding_manager | ||||
|      | ||||
|     async def generate_embedding(self, text: str, use_cache: bool = True) -> 'np.ndarray': | ||||
|         """Generate embedding asynchronously.""" | ||||
|         return await trio.to_thread.run_sync( | ||||
|             self.sync_manager.generate_embedding, | ||||
|             text, | ||||
|             use_cache | ||||
|         ) | ||||
|      | ||||
|     async def generate_embeddings_batch(self, texts: List[str], use_cache: bool = True) -> List['np.ndarray']: | ||||
|         """Generate embeddings for multiple texts asynchronously.""" | ||||
|         return await trio.to_thread.run_sync( | ||||
|             self.sync_manager.generate_embeddings_batch, | ||||
|             texts, | ||||
|             use_cache | ||||
|         ) | ||||
|      | ||||
|     async def store_embedding(self, context_id: int, embedding: 'np.ndarray') -> None: | ||||
|         """Store embedding asynchronously.""" | ||||
|         await trio.to_thread.run_sync( | ||||
|             self.sync_manager.store_embedding, | ||||
|             context_id, | ||||
|             embedding | ||||
|         ) | ||||
|      | ||||
|     async def store_embeddings_batch(self, context_embeddings: List[Tuple[int, 'np.ndarray']]) -> None: | ||||
|         """Store multiple embeddings asynchronously.""" | ||||
|         await trio.to_thread.run_sync( | ||||
|             self.sync_manager.store_embeddings_batch, | ||||
|             context_embeddings | ||||
|         ) | ||||
|      | ||||
|     async def get_embedding(self, context_id: int) -> Optional['np.ndarray']: | ||||
|         """Retrieve embedding asynchronously.""" | ||||
|         return await trio.to_thread.run_sync( | ||||
|             self.sync_manager.get_embedding, | ||||
|             context_id | ||||
|         ) | ||||
|      | ||||
|     async def semantic_search_optimized(self,  | ||||
|                                       query: str,  | ||||
|                                       path_prefix: str = None,  | ||||
|                                       top_k: int = 5, | ||||
|                                       include_contexts: bool = True) -> List[VectorSearchResult]: | ||||
|         """Perform semantic search asynchronously.""" | ||||
|         return await trio.to_thread.run_sync( | ||||
|             self.sync_manager.semantic_search_optimized, | ||||
|             query, | ||||
|             path_prefix, | ||||
|             top_k, | ||||
|             include_contexts | ||||
|         ) | ||||
|      | ||||
|     async def hybrid_search_optimized(self,  | ||||
|                                     query: str,  | ||||
|                                     path_prefix: str = None,  | ||||
|                                     top_k: int = 5, | ||||
|                                     semantic_weight: float = 0.7, | ||||
|                                     rerank_top_n: int = 50) -> List[VectorSearchResult]: | ||||
|         """Perform hybrid search asynchronously.""" | ||||
|         return await trio.to_thread.run_sync( | ||||
|             self.sync_manager.hybrid_search_optimized, | ||||
|             query, | ||||
|             path_prefix, | ||||
|             top_k, | ||||
|             semantic_weight, | ||||
|             rerank_top_n | ||||
|         ) | ||||
|      | ||||
|     async def vector_similarity_search(self,  | ||||
|                                      query_embedding: 'np.ndarray',  | ||||
|                                      context_ids: Optional[List[int]] = None, | ||||
|                                      top_k: int = 10, | ||||
|                                      min_similarity: float = 0.0) -> List[VectorSearchResult]: | ||||
|         """Perform vector similarity search asynchronously.""" | ||||
|         return await trio.to_thread.run_sync( | ||||
|             self.sync_manager.vector_similarity_search, | ||||
|             query_embedding, | ||||
|             context_ids, | ||||
|             top_k, | ||||
|             min_similarity | ||||
|         ) | ||||
|      | ||||
|     async def build_embeddings_index(self, batch_size: int = 100) -> Dict[str, Any]: | ||||
|         """Build embeddings index asynchronously.""" | ||||
|         return await trio.to_thread.run_sync( | ||||
|             self.sync_manager.build_embeddings_index, | ||||
|             batch_size | ||||
|         ) | ||||
|      | ||||
|     async def get_statistics(self) -> Dict[str, Any]: | ||||
|         """Get statistics asynchronously.""" | ||||
|         return await trio.to_thread.run_sync( | ||||
|             self.sync_manager.get_statistics | ||||
|         ) | ||||
|      | ||||
|     async def cleanup_old_embeddings(self, days_old: int = 30) -> int: | ||||
|         """Clean up old embeddings asynchronously.""" | ||||
|         return await trio.to_thread.run_sync( | ||||
|             self.sync_manager.cleanup_old_embeddings, | ||||
|             days_old | ||||
|         ) | ||||
|      | ||||
|     # Synchronous access to underlying manager properties | ||||
|     @property  | ||||
|     def model_config(self): | ||||
|         return self.sync_manager.model_config | ||||
|      | ||||
|     @property | ||||
|     def vector_cache(self): | ||||
|         return self.sync_manager.vector_cache | ||||
|      | ||||
|     @property | ||||
|     def batch_size(self): | ||||
|         return self.sync_manager.batch_size | ||||
		Reference in New Issue
	
	Block a user
	 Claude Code
					Claude Code