//go:build slurp_full // +build slurp_full // Package distribution provides consistent hashing for distributed context placement package distribution import ( "crypto/sha256" "fmt" "sort" "sync" ) // ConsistentHashingImpl implements ConsistentHashing interface using SHA-256 based ring type ConsistentHashingImpl struct { mu sync.RWMutex ring map[uint32]string // hash -> node mapping sortedHashes []uint32 // sorted hash values virtualNodes int // number of virtual nodes per physical node nodes map[string]bool // set of physical nodes } // NewConsistentHashingImpl creates a new consistent hashing implementation func NewConsistentHashingImpl() (*ConsistentHashingImpl, error) { return &ConsistentHashingImpl{ ring: make(map[uint32]string), sortedHashes: []uint32{}, virtualNodes: 150, // Standard virtual node count for good distribution nodes: make(map[string]bool), }, nil } // AddNode adds a physical node to the consistent hash ring func (ch *ConsistentHashingImpl) AddNode(nodeID string) error { ch.mu.Lock() defer ch.mu.Unlock() if ch.nodes[nodeID] { return fmt.Errorf("node %s already exists", nodeID) } // Add virtual nodes for this physical node for i := 0; i < ch.virtualNodes; i++ { virtualNodeKey := fmt.Sprintf("%s:%d", nodeID, i) hash := ch.hashKey(virtualNodeKey) ch.ring[hash] = nodeID ch.sortedHashes = append(ch.sortedHashes, hash) } // Keep sorted hashes array sorted sort.Slice(ch.sortedHashes, func(i, j int) bool { return ch.sortedHashes[i] < ch.sortedHashes[j] }) ch.nodes[nodeID] = true return nil } // RemoveNode removes a physical node from the consistent hash ring func (ch *ConsistentHashingImpl) RemoveNode(nodeID string) error { ch.mu.Lock() defer ch.mu.Unlock() if !ch.nodes[nodeID] { return fmt.Errorf("node %s does not exist", nodeID) } // Remove all virtual nodes for this physical node newSortedHashes := []uint32{} for _, hash := range ch.sortedHashes { if ch.ring[hash] != nodeID { newSortedHashes = append(newSortedHashes, hash) } else { delete(ch.ring, hash) } } ch.sortedHashes = newSortedHashes delete(ch.nodes, nodeID) return nil } // GetNode returns the node responsible for a given key func (ch *ConsistentHashingImpl) GetNode(key string) (string, error) { ch.mu.RLock() defer ch.mu.RUnlock() if len(ch.ring) == 0 { return "", fmt.Errorf("no nodes available") } hash := ch.hashKey(key) // Find the first node with hash >= key hash idx := sort.Search(len(ch.sortedHashes), func(i int) bool { return ch.sortedHashes[i] >= hash }) // Wrap around if we've gone past the end if idx == len(ch.sortedHashes) { idx = 0 } return ch.ring[ch.sortedHashes[idx]], nil } // GetNodes returns multiple nodes responsible for a key (for replication) func (ch *ConsistentHashingImpl) GetNodes(key string, count int) ([]string, error) { ch.mu.RLock() defer ch.mu.RUnlock() if len(ch.nodes) == 0 { return nil, fmt.Errorf("no nodes available") } if count <= 0 { return []string{}, nil } // Don't return more nodes than we have if count > len(ch.nodes) { count = len(ch.nodes) } hash := ch.hashKey(key) nodes := []string{} seenNodes := make(map[string]bool) // Find the starting position idx := sort.Search(len(ch.sortedHashes), func(i int) bool { return ch.sortedHashes[i] >= hash }) // Collect unique physical nodes for len(nodes) < count && len(seenNodes) < len(ch.nodes) { if idx >= len(ch.sortedHashes) { idx = 0 } nodeID := ch.ring[ch.sortedHashes[idx]] if !seenNodes[nodeID] { nodes = append(nodes, nodeID) seenNodes[nodeID] = true } idx++ } return nodes, nil } // GetAllNodes returns all physical nodes in the ring func (ch *ConsistentHashingImpl) GetAllNodes() []string { ch.mu.RLock() defer ch.mu.RUnlock() nodes := make([]string, 0, len(ch.nodes)) for nodeID := range ch.nodes { nodes = append(nodes, nodeID) } return nodes } // GetNodeDistribution returns the distribution of keys across nodes func (ch *ConsistentHashingImpl) GetNodeDistribution() map[string]float64 { ch.mu.RLock() defer ch.mu.RUnlock() if len(ch.sortedHashes) == 0 { return map[string]float64{} } distribution := make(map[string]float64) totalSpace := uint64(1) << 32 // 2^32 for uint32 hash space // Calculate the range each node is responsible for for i, hash := range ch.sortedHashes { nodeID := ch.ring[hash] var rangeSize uint64 if i == len(ch.sortedHashes)-1 { // Last hash wraps around to first rangeSize = uint64(ch.sortedHashes[0]) + totalSpace - uint64(hash) } else { rangeSize = uint64(ch.sortedHashes[i+1]) - uint64(hash) } percentage := float64(rangeSize) / float64(totalSpace) * 100 distribution[nodeID] += percentage } return distribution } // GetRingStatus returns status information about the hash ring func (ch *ConsistentHashingImpl) GetRingStatus() *RingStatus { ch.mu.RLock() defer ch.mu.RUnlock() status := &RingStatus{ PhysicalNodes: len(ch.nodes), VirtualNodes: len(ch.ring), RingSize: len(ch.sortedHashes), Distribution: ch.GetNodeDistribution(), LoadBalance: ch.calculateLoadBalance(), } return status } // hashKey computes SHA-256 hash of a key and returns first 4 bytes as uint32 func (ch *ConsistentHashingImpl) hashKey(key string) uint32 { hash := sha256.Sum256([]byte(key)) return uint32(hash[0])<<24 | uint32(hash[1])<<16 | uint32(hash[2])<<8 | uint32(hash[3]) } // calculateLoadBalance calculates how well-balanced the load distribution is func (ch *ConsistentHashingImpl) calculateLoadBalance() float64 { if len(ch.nodes) <= 1 { return 1.0 // Perfect balance with 0 or 1 nodes } distribution := ch.GetNodeDistribution() idealPercentage := 100.0 / float64(len(ch.nodes)) // Calculate variance from ideal distribution totalVariance := 0.0 for _, percentage := range distribution { variance := percentage - idealPercentage totalVariance += variance * variance } avgVariance := totalVariance / float64(len(distribution)) // Convert to a balance score (higher is better, 1.0 is perfect) // Use 1/(1+variance) to map variance to [0,1] range return 1.0 / (1.0 + avgVariance/100.0) } // RingStatus represents the status of the consistent hash ring type RingStatus struct { PhysicalNodes int `json:"physical_nodes"` VirtualNodes int `json:"virtual_nodes"` RingSize int `json:"ring_size"` Distribution map[string]float64 `json:"distribution"` LoadBalance float64 `json:"load_balance"` } // ConsistentHashMetrics provides metrics about hash ring performance type ConsistentHashMetrics struct { TotalKeys int64 `json:"total_keys"` NodeUtilization map[string]float64 `json:"node_utilization"` RebalanceEvents int64 `json:"rebalance_events"` AverageSeekTime float64 `json:"average_seek_time_ms"` LoadBalanceScore float64 `json:"load_balance_score"` LastRebalanceTime int64 `json:"last_rebalance_time"` } // GetMetrics returns performance metrics for the hash ring func (ch *ConsistentHashingImpl) GetMetrics() *ConsistentHashMetrics { ch.mu.RLock() defer ch.mu.RUnlock() return &ConsistentHashMetrics{ TotalKeys: 0, // Would be maintained by usage tracking NodeUtilization: ch.GetNodeDistribution(), RebalanceEvents: 0, // Would be maintained by event tracking AverageSeekTime: 0.1, // Placeholder - would be measured LoadBalanceScore: ch.calculateLoadBalance(), LastRebalanceTime: 0, // Would be maintained by event tracking } } // Rehash rebuilds the entire hash ring (useful after configuration changes) func (ch *ConsistentHashingImpl) Rehash() error { ch.mu.Lock() defer ch.mu.Unlock() // Save current nodes currentNodes := make([]string, 0, len(ch.nodes)) for nodeID := range ch.nodes { currentNodes = append(currentNodes, nodeID) } // Clear the ring ch.ring = make(map[uint32]string) ch.sortedHashes = []uint32{} ch.nodes = make(map[string]bool) // Re-add all nodes for _, nodeID := range currentNodes { if err := ch.addNodeUnsafe(nodeID); err != nil { return fmt.Errorf("failed to re-add node %s during rehash: %w", nodeID, err) } } return nil } // addNodeUnsafe adds a node without locking (internal use only) func (ch *ConsistentHashingImpl) addNodeUnsafe(nodeID string) error { if ch.nodes[nodeID] { return fmt.Errorf("node %s already exists", nodeID) } // Add virtual nodes for this physical node for i := 0; i < ch.virtualNodes; i++ { virtualNodeKey := fmt.Sprintf("%s:%d", nodeID, i) hash := ch.hashKey(virtualNodeKey) ch.ring[hash] = nodeID ch.sortedHashes = append(ch.sortedHashes, hash) } // Keep sorted hashes array sorted sort.Slice(ch.sortedHashes, func(i, j int) bool { return ch.sortedHashes[i] < ch.sortedHashes[j] }) ch.nodes[nodeID] = true return nil } // SetVirtualNodeCount configures the number of virtual nodes per physical node func (ch *ConsistentHashingImpl) SetVirtualNodeCount(count int) error { if count <= 0 { return fmt.Errorf("virtual node count must be positive") } if count > 1000 { return fmt.Errorf("virtual node count too high (max 1000)") } ch.mu.Lock() defer ch.mu.Unlock() ch.virtualNodes = count // Rehash with new virtual node count return ch.Rehash() } // FindClosestNodes finds the N closest nodes to a given key in the ring func (ch *ConsistentHashingImpl) FindClosestNodes(key string, count int) ([]string, []uint32, error) { ch.mu.RLock() defer ch.mu.RUnlock() if len(ch.ring) == 0 { return nil, nil, fmt.Errorf("no nodes available") } if count <= 0 { return []string{}, []uint32{}, nil } keyHash := ch.hashKey(key) distances := []struct { nodeID string hash uint32 distance uint32 }{} // Calculate distances to all virtual nodes for hash, nodeID := range ch.ring { var distance uint32 if hash >= keyHash { distance = hash - keyHash } else { // Wrap around distance without overflowing 32-bit space distance = uint32((uint64(1)<<32 - uint64(keyHash)) + uint64(hash)) } distances = append(distances, struct { nodeID string hash uint32 distance uint32 }{nodeID, hash, distance}) } // Sort by distance sort.Slice(distances, func(i, j int) bool { return distances[i].distance < distances[j].distance }) // Collect unique nodes seen := make(map[string]bool) nodes := []string{} hashes := []uint32{} for _, d := range distances { if len(nodes) >= count { break } if !seen[d.nodeID] { nodes = append(nodes, d.nodeID) hashes = append(hashes, d.hash) seen[d.nodeID] = true } } return nodes, hashes, nil }