package storage import ( "context" "encoding/json" "fmt" "sort" "strings" "sync" "time" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis/analyzer/standard" "github.com/blevesearch/bleve/v2/analysis/lang/en" "github.com/blevesearch/bleve/v2/mapping" "github.com/anthonyrawlins/bzzz/pkg/ucxl" slurpContext "github.com/anthonyrawlins/bzzz/pkg/slurp/context" ) // IndexManagerImpl implements the IndexManager interface using Bleve type IndexManagerImpl struct { mu sync.RWMutex indexes map[string]bleve.Index stats map[string]*IndexStatistics basePath string nodeID string options *IndexManagerOptions } // IndexManagerOptions configures index manager behavior type IndexManagerOptions struct { DefaultAnalyzer string `json:"default_analyzer"` MaxDocumentSize int64 `json:"max_document_size"` RefreshInterval time.Duration `json:"refresh_interval"` OptimizeInterval time.Duration `json:"optimize_interval"` EnableHighlighting bool `json:"enable_highlighting"` EnableFaceting bool `json:"enable_faceting"` BatchSize int `json:"batch_size"` MaxResults int `json:"max_results"` } // DefaultIndexManagerOptions returns sensible defaults func DefaultIndexManagerOptions() *IndexManagerOptions { return &IndexManagerOptions{ DefaultAnalyzer: "standard", MaxDocumentSize: 10 * 1024 * 1024, // 10MB RefreshInterval: 5 * time.Minute, OptimizeInterval: 1 * time.Hour, EnableHighlighting: true, EnableFaceting: true, BatchSize: 100, MaxResults: 1000, } } // NewIndexManager creates a new index manager func NewIndexManager(basePath, nodeID string, options *IndexManagerOptions) (*IndexManagerImpl, error) { if options == nil { options = DefaultIndexManagerOptions() } im := &IndexManagerImpl{ indexes: make(map[string]bleve.Index), stats: make(map[string]*IndexStatistics), basePath: basePath, nodeID: nodeID, options: options, } // Start background optimization if enabled if options.OptimizeInterval > 0 { go im.optimizationLoop() } return im, nil } // CreateIndex creates a search index for contexts func (im *IndexManagerImpl) CreateIndex( ctx context.Context, indexName string, config *IndexConfig, ) error { im.mu.Lock() defer im.mu.Unlock() // Check if index already exists if _, exists := im.indexes[indexName]; exists { return fmt.Errorf("index %s already exists", indexName) } // Create index mapping mapping, err := im.createIndexMapping(config) if err != nil { return fmt.Errorf("failed to create index mapping: %w", err) } // Create the index indexPath := fmt.Sprintf("%s/%s.bleve", im.basePath, indexName) index, err := bleve.New(indexPath, mapping) if err != nil { return fmt.Errorf("failed to create index: %w", err) } // Store the index im.indexes[indexName] = index im.stats[indexName] = &IndexStatistics{ Name: indexName, LastUpdate: time.Now(), LastOptimization: time.Now(), } return nil } // UpdateIndex updates search index with new data func (im *IndexManagerImpl) UpdateIndex( ctx context.Context, indexName string, key string, data interface{}, ) error { im.mu.RLock() index, exists := im.indexes[indexName] stats := im.stats[indexName] im.mu.RUnlock() if !exists { return fmt.Errorf("index %s does not exist", indexName) } // Create indexable document from context data doc, err := im.createIndexDocument(data) if err != nil { return fmt.Errorf("failed to create index document: %w", err) } // Check document size docSize := im.estimateDocumentSize(doc) if docSize > im.options.MaxDocumentSize { return fmt.Errorf("document too large: %d bytes exceeds limit of %d", docSize, im.options.MaxDocumentSize) } // Index the document start := time.Now() if err := index.Index(key, doc); err != nil { return fmt.Errorf("failed to index document: %w", err) } // Update statistics im.mu.Lock() stats.DocumentCount++ stats.LastUpdate = time.Now() stats.IndexSize += docSize updateTime := time.Since(start) // Update average indexing time if stats.AverageQueryTime == 0 { stats.AverageQueryTime = updateTime } else { stats.AverageQueryTime = time.Duration( float64(stats.AverageQueryTime)*0.9 + float64(updateTime)*0.1, ) } im.mu.Unlock() return nil } // DeleteFromIndex removes data from search index func (im *IndexManagerImpl) DeleteFromIndex( ctx context.Context, indexName string, key string, ) error { im.mu.RLock() index, exists := im.indexes[indexName] stats := im.stats[indexName] im.mu.RUnlock() if !exists { return fmt.Errorf("index %s does not exist", indexName) } // Delete the document if err := index.Delete(key); err != nil { return fmt.Errorf("failed to delete document: %w", err) } // Update statistics im.mu.Lock() if stats.DocumentCount > 0 { stats.DocumentCount-- } stats.LastUpdate = time.Now() im.mu.Unlock() return nil } // Search searches indexed data using query func (im *IndexManagerImpl) Search( ctx context.Context, indexName string, query *SearchQuery, ) (*SearchResults, error) { start := time.Now() defer func() { im.updateSearchStats(indexName, time.Since(start)) }() im.mu.RLock() index, exists := im.indexes[indexName] im.mu.RUnlock() if !exists { return nil, fmt.Errorf("index %s does not exist", indexName) } // Build search request searchRequest, err := im.buildSearchRequest(query) if err != nil { return nil, fmt.Errorf("failed to build search request: %w", err) } // Execute search searchResult, err := index.Search(searchRequest) if err != nil { return nil, fmt.Errorf("search failed: %w", err) } // Convert to our search results format results, err := im.convertSearchResults(searchResult, query) if err != nil { return nil, fmt.Errorf("failed to convert search results: %w", err) } return results, nil } // RebuildIndex rebuilds search index from stored data func (im *IndexManagerImpl) RebuildIndex( ctx context.Context, indexName string, ) error { im.mu.Lock() defer im.mu.Unlock() index, exists := im.indexes[indexName] if !exists { return fmt.Errorf("index %s does not exist", indexName) } // Close current index if err := index.Close(); err != nil { return fmt.Errorf("failed to close index: %w", err) } // Delete index files indexPath := fmt.Sprintf("%s/%s.bleve", im.basePath, indexName) // Note: In production, you'd want to use proper file system operations // to delete the index directory // Recreate index with same configuration // This is a simplified implementation - in practice you'd need to // recreate with the original configuration and re-index all documents mapping := bleve.NewIndexMapping() newIndex, err := bleve.New(indexPath, mapping) if err != nil { return fmt.Errorf("failed to recreate index: %w", err) } // Replace in memory im.indexes[indexName] = newIndex im.stats[indexName].DocumentCount = 0 im.stats[indexName].LastUpdate = time.Now() im.stats[indexName].LastOptimization = time.Now() return nil } // OptimizeIndex optimizes search index for performance func (im *IndexManagerImpl) OptimizeIndex( ctx context.Context, indexName string, ) error { im.mu.RLock() index, exists := im.indexes[indexName] stats := im.stats[indexName] im.mu.RUnlock() if !exists { return fmt.Errorf("index %s does not exist", indexName) } // Bleve doesn't have explicit optimization, but we can force a merge // This is a no-op for Bleve, but we update stats im.mu.Lock() stats.LastOptimization = time.Now() stats.FragmentationRatio = im.calculateFragmentationRatio(index) im.mu.Unlock() return nil } // GetIndexStats returns index statistics func (im *IndexManagerImpl) GetIndexStats( ctx context.Context, indexName string, ) (*IndexStatistics, error) { im.mu.RLock() stats, exists := im.stats[indexName] im.mu.RUnlock() if !exists { return nil, fmt.Errorf("index %s does not exist", indexName) } // Return a copy statsCopy := *stats return &statsCopy, nil } // ListIndexes lists all available indexes func (im *IndexManagerImpl) ListIndexes(ctx context.Context) ([]string, error) { im.mu.RLock() defer im.mu.RUnlock() var indexNames []string for name := range im.indexes { indexNames = append(indexNames, name) } sort.Strings(indexNames) return indexNames, nil } // Helper methods func (im *IndexManagerImpl) createIndexMapping(config *IndexConfig) (mapping.IndexMapping, error) { // Create a new index mapping indexMapping := bleve.NewIndexMapping() // Configure default analyzer analyzer := config.Analyzer if analyzer == "" { analyzer = im.options.DefaultAnalyzer } // Set document mapping docMapping := bleve.NewDocumentMapping() // Map context fields for _, field := range config.Fields { fieldMapping := bleve.NewTextFieldMapping() fieldMapping.Analyzer = analyzer fieldMapping.Store = true fieldMapping.Index = true if im.options.EnableHighlighting { fieldMapping.IncludeTermVectors = true } docMapping.AddFieldMappingsAt(field, fieldMapping) } // Add special fields for faceting if enabled if im.options.EnableFaceting { // Add tags as keyword field for faceting tagsMapping := bleve.NewKeywordFieldMapping() tagsMapping.Store = true tagsMapping.Index = true docMapping.AddFieldMappingsAt("tags_facet", tagsMapping) // Add technologies as keyword field for faceting techMapping := bleve.NewKeywordFieldMapping() techMapping.Store = true techMapping.Index = true docMapping.AddFieldMappingsAt("technologies_facet", techMapping) } // Set default document type indexMapping.DefaultMapping = docMapping // Configure analyzers if config.Language == "en" { indexMapping.DefaultAnalyzer = en.AnalyzerName } else { indexMapping.DefaultAnalyzer = standard.Name } return indexMapping, nil } func (im *IndexManagerImpl) createIndexDocument(data interface{}) (map[string]interface{}, error) { // Convert context node to indexable document contextNode, ok := data.(*slurpContext.ContextNode) if !ok { return nil, fmt.Errorf("unsupported data type for indexing") } doc := map[string]interface{}{ "path": contextNode.Path, "ucxl_address": contextNode.UCXLAddress.String(), "summary": contextNode.Summary, "purpose": contextNode.Purpose, "technologies": strings.Join(contextNode.Technologies, " "), "tags": strings.Join(contextNode.Tags, " "), "insights": strings.Join(contextNode.Insights, " "), "overrides_parent": contextNode.OverridesParent, "context_specificity": contextNode.ContextSpecificity, "applies_to_children": contextNode.AppliesToChildren, "rag_confidence": contextNode.RAGConfidence, "generated_at": contextNode.GeneratedAt, } // Add faceting fields if enabled if im.options.EnableFaceting { doc["tags_facet"] = contextNode.Tags doc["technologies_facet"] = contextNode.Technologies } // Create searchable content by combining key fields combinedContent := fmt.Sprintf("%s %s %s %s", contextNode.Summary, contextNode.Purpose, strings.Join(contextNode.Technologies, " "), strings.Join(contextNode.Insights, " "), ) doc["content"] = combinedContent return doc, nil } func (im *IndexManagerImpl) buildSearchRequest(query *SearchQuery) (*bleve.SearchRequest, error) { // Build Bleve search request from our search query var bleveQuery bleve.Query if query.Query == "" { // Match all query bleveQuery = bleve.NewMatchAllQuery() } else { // Text search query if query.FuzzyMatch { // Use fuzzy query bleveQuery = bleve.NewFuzzyQuery(query.Query) } else { // Use match query for better scoring bleveQuery = bleve.NewMatchQuery(query.Query) } } // Add filters var conjuncts []bleve.Query conjuncts = append(conjuncts, bleveQuery) // Technology filters if len(query.Technologies) > 0 { for _, tech := range query.Technologies { techQuery := bleve.NewTermQuery(tech) techQuery.SetField("technologies_facet") conjuncts = append(conjuncts, techQuery) } } // Tag filters if len(query.Tags) > 0 { for _, tag := range query.Tags { tagQuery := bleve.NewTermQuery(tag) tagQuery.SetField("tags_facet") conjuncts = append(conjuncts, tagQuery) } } // Combine all queries if len(conjuncts) > 1 { bleveQuery = bleve.NewConjunctionQuery(conjuncts...) } // Create search request searchRequest := bleve.NewSearchRequest(bleveQuery) // Set result options if query.Limit > 0 && query.Limit <= im.options.MaxResults { searchRequest.Size = query.Limit } else { searchRequest.Size = im.options.MaxResults } if query.Offset > 0 { searchRequest.From = query.Offset } // Enable highlighting if requested if query.HighlightTerms && im.options.EnableHighlighting { searchRequest.Highlight = bleve.NewHighlight() searchRequest.Highlight.AddField("content") searchRequest.Highlight.AddField("summary") searchRequest.Highlight.AddField("purpose") } // Add facets if requested if len(query.Facets) > 0 && im.options.EnableFaceting { searchRequest.Facets = make(bleve.FacetsRequest) for _, facet := range query.Facets { switch facet { case "technologies": searchRequest.Facets["technologies"] = bleve.NewFacetRequest("technologies_facet", 10) case "tags": searchRequest.Facets["tags"] = bleve.NewFacetRequest("tags_facet", 10) } } } // Set fields to return searchRequest.Fields = []string{"*"} return searchRequest, nil } func (im *IndexManagerImpl) convertSearchResults( searchResult *bleve.SearchResult, query *SearchQuery, ) (*SearchResults, error) { results := &SearchResults{ Query: query, Results: make([]*SearchResult, 0, len(searchResult.Hits)), TotalResults: int64(searchResult.Total), ProcessingTime: searchResult.Took, ProcessedAt: time.Now(), } // Convert hits for i, hit := range searchResult.Hits { searchHit := &SearchResult{ MatchScore: hit.Score, MatchedFields: make([]string, 0), Highlights: make(map[string][]string), Rank: i + 1, } // Extract matched fields from hit for field := range hit.Fields { searchHit.MatchedFields = append(searchHit.MatchedFields, field) } // Extract highlights for field, fragments := range hit.Fragments { searchHit.Highlights[field] = fragments } // Create context node from hit data (simplified) contextNode := &slurpContext.ContextNode{ Path: hit.Fields["path"].(string), Summary: hit.Fields["summary"].(string), Purpose: hit.Fields["purpose"].(string), } // Parse UCXL address if ucxlStr, ok := hit.Fields["ucxl_address"].(string); ok { if addr, err := ucxl.ParseAddress(ucxlStr); err == nil { contextNode.UCXLAddress = addr } } searchHit.Context = contextNode results.Results = append(results.Results, searchHit) } // Convert facets if len(searchResult.Facets) > 0 { results.Facets = make(map[string]map[string]int) for facetName, facetResult := range searchResult.Facets { facetCounts := make(map[string]int) for _, term := range facetResult.Terms { facetCounts[term.Term] = term.Count } results.Facets[facetName] = facetCounts } } return results, nil } func (im *IndexManagerImpl) estimateDocumentSize(doc map[string]interface{}) int64 { // Rough estimation of document size docBytes, _ := json.Marshal(doc) return int64(len(docBytes)) } func (im *IndexManagerImpl) calculateFragmentationRatio(index bleve.Index) float64 { // Simplified fragmentation calculation // In practice, this would analyze the index structure return 0.1 // Placeholder: 10% fragmentation } func (im *IndexManagerImpl) updateSearchStats(indexName string, duration time.Duration) { im.mu.Lock() defer im.mu.Unlock() stats, exists := im.stats[indexName] if !exists { return } stats.QueryCount++ if stats.AverageQueryTime == 0 { stats.AverageQueryTime = duration } else { // Exponential moving average stats.AverageQueryTime = time.Duration( float64(stats.AverageQueryTime)*0.9 + float64(duration)*0.1, ) } // Update success rate (simplified - assumes all queries succeed) stats.SuccessRate = 1.0 } func (im *IndexManagerImpl) optimizationLoop() { ticker := time.NewTicker(im.options.OptimizeInterval) defer ticker.Stop() for range ticker.C { im.performOptimization() } } func (im *IndexManagerImpl) performOptimization() { im.mu.RLock() indexNames := make([]string, 0, len(im.indexes)) for name := range im.indexes { indexNames = append(indexNames, name) } im.mu.RUnlock() // Optimize each index for _, indexName := range indexNames { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) if err := im.OptimizeIndex(ctx, indexName); err != nil { // Log error but continue with other indexes fmt.Printf("Failed to optimize index %s: %v\n", indexName, err) } cancel() } } // Close closes all indexes and cleans up resources func (im *IndexManagerImpl) Close() error { im.mu.Lock() defer im.mu.Unlock() for name, index := range im.indexes { if err := index.Close(); err != nil { fmt.Printf("Failed to close index %s: %v\n", name, err) } } im.indexes = make(map[string]bleve.Index) im.stats = make(map[string]*IndexStatistics) return nil }