// Package distribution provides gossip protocol for metadata synchronization package distribution import ( "context" "encoding/json" "fmt" "math/rand" "sync" "time" "github.com/anthonyrawlins/bzzz/pkg/dht" "github.com/anthonyrawlins/bzzz/pkg/config" "github.com/anthonyrawlins/bzzz/pkg/ucxl" ) // GossipProtocolImpl implements GossipProtocol interface for metadata synchronization type GossipProtocolImpl struct { mu sync.RWMutex dht *dht.DHT config *config.Config running bool gossipInterval time.Duration maxGossipPeers int compressionEnabled bool messageBuffer chan *GossipMessage state *GossipState stats *GossipStatistics metadataCache map[string]*ContextMetadata vectorClock map[string]int64 failureDetector *FailureDetector } // GossipMessage represents a message in the gossip protocol type GossipMessage struct { MessageID string `json:"message_id"` MessageType GossipMessageType `json:"message_type"` SenderID string `json:"sender_id"` Timestamp time.Time `json:"timestamp"` TTL int `json:"ttl"` VectorClock map[string]int64 `json:"vector_clock"` Payload map[string]interface{} `json:"payload"` Metadata *GossipMessageMetadata `json:"metadata"` } // GossipMessageType represents different types of gossip messages type GossipMessageType string const ( GossipMessageHeartbeat GossipMessageType = "heartbeat" GossipMessageMetadataSync GossipMessageType = "metadata_sync" GossipMessageContextUpdate GossipMessageType = "context_update" GossipMessagePeerDiscovery GossipMessageType = "peer_discovery" GossipMessageConflictAlert GossipMessageType = "conflict_alert" GossipMessageHealthCheck GossipMessageType = "health_check" ) // GossipMessageMetadata contains metadata about gossip messages type GossipMessageMetadata struct { Priority Priority `json:"priority"` Reliability bool `json:"reliability"` Encrypted bool `json:"encrypted"` Compressed bool `json:"compressed"` OriginalSize int `json:"original_size"` CompressionType string `json:"compression_type"` } // ContextMetadata represents metadata about a distributed context type ContextMetadata struct { Address ucxl.Address `json:"address"` Version int64 `json:"version"` LastUpdated time.Time `json:"last_updated"` UpdatedBy string `json:"updated_by"` Roles []string `json:"roles"` Size int64 `json:"size"` Checksum string `json:"checksum"` ReplicationNodes []string `json:"replication_nodes"` VectorClock map[string]int64 `json:"vector_clock"` Status MetadataStatus `json:"status"` } // MetadataStatus represents the status of context metadata type MetadataStatus string const ( MetadataStatusActive MetadataStatus = "active" MetadataStatusDeprecated MetadataStatus = "deprecated" MetadataStatusDeleted MetadataStatus = "deleted" MetadataStatusConflicted MetadataStatus = "conflicted" ) // FailureDetector detects failed nodes in the network type FailureDetector struct { mu sync.RWMutex suspectedNodes map[string]time.Time failedNodes map[string]time.Time heartbeatTimeout time.Duration failureThreshold time.Duration } // NewGossipProtocolImpl creates a new gossip protocol implementation func NewGossipProtocolImpl(dht *dht.DHT, config *config.Config) (*GossipProtocolImpl, error) { if dht == nil { return nil, fmt.Errorf("DHT instance is required") } if config == nil { return nil, fmt.Errorf("config is required") } gp := &GossipProtocolImpl{ dht: dht, config: config, running: false, gossipInterval: 30 * time.Second, maxGossipPeers: 5, compressionEnabled: true, messageBuffer: make(chan *GossipMessage, 1000), state: &GossipState{ Running: false, CurrentRound: 0, RoundStartTime: time.Now(), RoundDuration: 0, ActiveConnections: 0, PendingMessages: 0, NextRoundTime: time.Now().Add(30 * time.Second), ProtocolVersion: "v1.0", State: "stopped", }, stats: &GossipStatistics{ LastUpdated: time.Now(), }, metadataCache: make(map[string]*ContextMetadata), vectorClock: make(map[string]int64), failureDetector: &FailureDetector{ suspectedNodes: make(map[string]time.Time), failedNodes: make(map[string]time.Time), heartbeatTimeout: 60 * time.Second, failureThreshold: 120 * time.Second, }, } return gp, nil } // StartGossip begins gossip protocol for metadata synchronization func (gp *GossipProtocolImpl) StartGossip(ctx context.Context) error { gp.mu.Lock() if gp.running { gp.mu.Unlock() return fmt.Errorf("gossip protocol already running") } gp.running = true gp.state.Running = true gp.state.State = "running" gp.mu.Unlock() // Start background workers go gp.gossipWorker(ctx) go gp.messageProcessor(ctx) go gp.heartbeatSender(ctx) go gp.failureDetectorWorker(ctx) return nil } // StopGossip stops gossip protocol func (gp *GossipProtocolImpl) StopGossip(ctx context.Context) error { gp.mu.Lock() defer gp.mu.Unlock() if !gp.running { return fmt.Errorf("gossip protocol not running") } gp.running = false gp.state.Running = false gp.state.State = "stopped" close(gp.messageBuffer) return nil } // GossipMetadata exchanges metadata with peer nodes func (gp *GossipProtocolImpl) GossipMetadata(ctx context.Context, peer string) error { if !gp.running { return fmt.Errorf("gossip protocol not running") } // Create metadata sync message message := &GossipMessage{ MessageID: gp.generateMessageID(), MessageType: GossipMessageMetadataSync, SenderID: gp.config.Agent.ID, Timestamp: time.Now(), TTL: 3, // Max 3 hops VectorClock: gp.getVectorClock(), Payload: map[string]interface{}{ "metadata_cache": gp.getMetadataCacheSnapshot(), "request_sync": true, }, Metadata: &GossipMessageMetadata{ Priority: PriorityNormal, Reliability: true, Encrypted: false, Compressed: gp.compressionEnabled, }, } // Send to specific peer return gp.sendMessage(ctx, message, peer) } // GetGossipState returns current gossip protocol state func (gp *GossipProtocolImpl) GetGossipState() (*GossipState, error) { gp.mu.RLock() defer gp.mu.RUnlock() // Update dynamic state gp.state.ActiveConnections = len(gp.dht.GetConnectedPeers()) gp.state.PendingMessages = len(gp.messageBuffer) return gp.state, nil } // SetGossipInterval configures gossip frequency func (gp *GossipProtocolImpl) SetGossipInterval(interval time.Duration) error { if interval < time.Second { return fmt.Errorf("gossip interval too short (minimum 1 second)") } if interval > time.Hour { return fmt.Errorf("gossip interval too long (maximum 1 hour)") } gp.mu.Lock() gp.gossipInterval = interval gp.state.NextRoundTime = time.Now().Add(interval) gp.mu.Unlock() return nil } // GetGossipStats returns gossip protocol statistics func (gp *GossipProtocolImpl) GetGossipStats() (*GossipStatistics, error) { gp.mu.RLock() defer gp.mu.RUnlock() // Update real-time stats gp.stats.ActivePeers = len(gp.dht.GetConnectedPeers()) gp.stats.LastGossipTime = time.Now() gp.stats.LastUpdated = time.Now() return gp.stats, nil } // Background workers func (gp *GossipProtocolImpl) gossipWorker(ctx context.Context) { ticker := time.NewTicker(gp.gossipInterval) defer ticker.Stop() for { select { case <-ctx.Done(): return case <-ticker.C: if gp.running { gp.performGossipRound(ctx) } } } } func (gp *GossipProtocolImpl) messageProcessor(ctx context.Context) { for { select { case <-ctx.Done(): return case message := <-gp.messageBuffer: if message == nil { return // Channel closed } gp.processIncomingMessage(ctx, message) } } } func (gp *GossipProtocolImpl) heartbeatSender(ctx context.Context) { ticker := time.NewTicker(30 * time.Second) defer ticker.Stop() for { select { case <-ctx.Done(): return case <-ticker.C: if gp.running { gp.sendHeartbeat(ctx) } } } } func (gp *GossipProtocolImpl) failureDetectorWorker(ctx context.Context) { ticker := time.NewTicker(60 * time.Second) defer ticker.Stop() for { select { case <-ctx.Done(): return case <-ticker.C: if gp.running { gp.detectFailures() } } } } // Core gossip operations func (gp *GossipProtocolImpl) performGossipRound(ctx context.Context) { start := time.Now() gp.mu.Lock() gp.state.CurrentRound++ gp.state.RoundStartTime = start gp.stats.GossipRounds++ gp.mu.Unlock() // Select random peers for gossip peers := gp.selectGossipPeers() // Perform gossip with selected peers for _, peer := range peers { go func(peerID string) { if err := gp.GossipMetadata(ctx, peerID); err != nil { gp.mu.Lock() gp.stats.NetworkErrors++ gp.mu.Unlock() } }(peer) } // Update round duration gp.mu.Lock() gp.state.RoundDuration = time.Since(start) gp.state.NextRoundTime = time.Now().Add(gp.gossipInterval) gp.stats.AverageRoundTime = (gp.stats.AverageRoundTime + gp.state.RoundDuration) / 2 gp.mu.Unlock() } func (gp *GossipProtocolImpl) selectGossipPeers() []string { connectedPeers := gp.dht.GetConnectedPeers() if len(connectedPeers) == 0 { return []string{} } // Randomly select up to maxGossipPeers selectedCount := min(len(connectedPeers), gp.maxGossipPeers) selected := make([]string, 0, selectedCount) // Simple random selection perm := rand.Perm(len(connectedPeers)) for i := 0; i < selectedCount; i++ { selected = append(selected, connectedPeers[perm[i]].String()) } return selected } func (gp *GossipProtocolImpl) processIncomingMessage(ctx context.Context, message *GossipMessage) { // Update vector clock gp.updateVectorClock(message.VectorClock) // Process based on message type switch message.MessageType { case GossipMessageHeartbeat: gp.processHeartbeat(message) case GossipMessageMetadataSync: gp.processMetadataSync(ctx, message) case GossipMessageContextUpdate: gp.processContextUpdate(message) case GossipMessagePeerDiscovery: gp.processPeerDiscovery(message) case GossipMessageConflictAlert: gp.processConflictAlert(message) case GossipMessageHealthCheck: gp.processHealthCheck(message) default: gp.mu.Lock() gp.stats.ProtocolErrors++ gp.mu.Unlock() } // Update statistics gp.mu.Lock() gp.stats.MessagesReceived++ gp.mu.Unlock() } func (gp *GossipProtocolImpl) sendMessage(ctx context.Context, message *GossipMessage, peer string) error { // Serialize message messageBytes, err := json.Marshal(message) if err != nil { return fmt.Errorf("failed to serialize message: %w", err) } // Compress if enabled if gp.compressionEnabled && message.Metadata != nil { compressedBytes, err := gp.compressMessage(messageBytes) if err == nil { message.Metadata.Compressed = true message.Metadata.OriginalSize = len(messageBytes) message.Metadata.CompressionType = "gzip" messageBytes = compressedBytes } } // Send via DHT (in a real implementation, this would use direct peer connections) key := fmt.Sprintf("gossip:%s:%s", peer, message.MessageID) if err := gp.dht.PutValue(ctx, key, messageBytes); err != nil { gp.mu.Lock() gp.stats.MessagesDropped++ gp.mu.Unlock() return fmt.Errorf("failed to send gossip message: %w", err) } gp.mu.Lock() gp.stats.MessagesSent++ gp.mu.Unlock() return nil } func (gp *GossipProtocolImpl) sendHeartbeat(ctx context.Context) { message := &GossipMessage{ MessageID: gp.generateMessageID(), MessageType: GossipMessageHeartbeat, SenderID: gp.config.Agent.ID, Timestamp: time.Now(), TTL: 1, // Heartbeats don't propagate VectorClock: gp.getVectorClock(), Payload: map[string]interface{}{ "status": "alive", "load": gp.calculateNodeLoad(), "version": "1.0.0", "capabilities": []string{"context_distribution", "replication"}, }, Metadata: &GossipMessageMetadata{ Priority: PriorityHigh, Reliability: false, // Heartbeats can be lost Encrypted: false, Compressed: false, }, } // Send to all connected peers peers := gp.selectGossipPeers() for _, peer := range peers { go func(peerID string) { gp.sendMessage(ctx, message, peerID) }(peer) } } func (gp *GossipProtocolImpl) detectFailures() { now := time.Now() gp.failureDetector.mu.Lock() defer gp.failureDetector.mu.Unlock() // Check for suspected nodes that haven't responded for nodeID, suspectedTime := range gp.failureDetector.suspectedNodes { if now.Sub(suspectedTime) > gp.failureDetector.failureThreshold { // Mark as failed gp.failureDetector.failedNodes[nodeID] = now delete(gp.failureDetector.suspectedNodes, nodeID) } } // Clean up old failure records for nodeID, failedTime := range gp.failureDetector.failedNodes { if now.Sub(failedTime) > 24*time.Hour { delete(gp.failureDetector.failedNodes, nodeID) } } } // Message processing handlers func (gp *GossipProtocolImpl) processHeartbeat(message *GossipMessage) { // Remove from suspected/failed lists if present gp.failureDetector.mu.Lock() delete(gp.failureDetector.suspectedNodes, message.SenderID) delete(gp.failureDetector.failedNodes, message.SenderID) gp.failureDetector.mu.Unlock() // Update peer information if load, ok := message.Payload["load"].(float64); ok { // Store peer load information _ = load } } func (gp *GossipProtocolImpl) processMetadataSync(ctx context.Context, message *GossipMessage) { // Extract metadata cache from payload if metadataCache, ok := message.Payload["metadata_cache"].(map[string]interface{}); ok { gp.mergeMetadataCache(metadataCache) } // If this is a sync request, respond with our metadata if requestSync, ok := message.Payload["request_sync"].(bool); ok && requestSync { responseMessage := &GossipMessage{ MessageID: gp.generateMessageID(), MessageType: GossipMessageMetadataSync, SenderID: gp.config.Agent.ID, Timestamp: time.Now(), TTL: 1, VectorClock: gp.getVectorClock(), Payload: map[string]interface{}{ "metadata_cache": gp.getMetadataCacheSnapshot(), "request_sync": false, }, Metadata: &GossipMessageMetadata{ Priority: PriorityNormal, Reliability: true, Encrypted: false, Compressed: gp.compressionEnabled, }, } go func() { gp.sendMessage(ctx, responseMessage, message.SenderID) }() } } func (gp *GossipProtocolImpl) processContextUpdate(message *GossipMessage) { // Handle context update notifications if address, ok := message.Payload["address"].(string); ok { if version, ok := message.Payload["version"].(float64); ok { gp.updateContextMetadata(address, int64(version), message.SenderID) } } } func (gp *GossipProtocolImpl) processPeerDiscovery(message *GossipMessage) { // Handle peer discovery messages if peers, ok := message.Payload["peers"].([]interface{}); ok { for _, peerData := range peers { if peer, ok := peerData.(string); ok { // Add discovered peer to our peer list _ = peer } } } } func (gp *GossipProtocolImpl) processConflictAlert(message *GossipMessage) { // Handle conflict alert messages if address, ok := message.Payload["address"].(string); ok { // Mark context as conflicted in our metadata cache gp.mu.Lock() if metadata, exists := gp.metadataCache[address]; exists { metadata.Status = MetadataStatusConflicted } gp.mu.Unlock() } } func (gp *GossipProtocolImpl) processHealthCheck(message *GossipMessage) { // Respond to health check with our status // Implementation would send back health information } // Helper methods func (gp *GossipProtocolImpl) generateMessageID() string { return fmt.Sprintf("%s-%d", gp.config.Agent.ID, time.Now().UnixNano()) } func (gp *GossipProtocolImpl) getVectorClock() map[string]int64 { gp.mu.RLock() defer gp.mu.RUnlock() clock := make(map[string]int64) for nodeID, timestamp := range gp.vectorClock { clock[nodeID] = timestamp } clock[gp.config.Agent.ID] = time.Now().Unix() return clock } func (gp *GossipProtocolImpl) updateVectorClock(remoteClock map[string]int64) { gp.mu.Lock() defer gp.mu.Unlock() for nodeID, timestamp := range remoteClock { if existingTimestamp, exists := gp.vectorClock[nodeID]; !exists || timestamp > existingTimestamp { gp.vectorClock[nodeID] = timestamp } } } func (gp *GossipProtocolImpl) getMetadataCacheSnapshot() map[string]*ContextMetadata { gp.mu.RLock() defer gp.mu.RUnlock() snapshot := make(map[string]*ContextMetadata) for address, metadata := range gp.metadataCache { // Deep copy metadata snapshot[address] = &ContextMetadata{ Address: metadata.Address, Version: metadata.Version, LastUpdated: metadata.LastUpdated, UpdatedBy: metadata.UpdatedBy, Roles: append([]string{}, metadata.Roles...), Size: metadata.Size, Checksum: metadata.Checksum, ReplicationNodes: append([]string{}, metadata.ReplicationNodes...), VectorClock: make(map[string]int64), Status: metadata.Status, } for k, v := range metadata.VectorClock { snapshot[address].VectorClock[k] = v } } return snapshot } func (gp *GossipProtocolImpl) mergeMetadataCache(remoteCache map[string]interface{}) { gp.mu.Lock() defer gp.mu.Unlock() // Simplified merge logic - in production would be more sophisticated for address, metadataInterface := range remoteCache { if metadataMap, ok := metadataInterface.(map[string]interface{}); ok { // Convert map to ContextMetadata struct // This is simplified - production code would use proper deserialization if version, ok := metadataMap["version"].(float64); ok { if existing, exists := gp.metadataCache[address]; !exists || int64(version) > existing.Version { // Update with newer version // Implementation would properly deserialize the metadata } } } } } func (gp *GossipProtocolImpl) updateContextMetadata(address string, version int64, updatedBy string) { gp.mu.Lock() defer gp.mu.Unlock() if existing, exists := gp.metadataCache[address]; exists && version > existing.Version { existing.Version = version existing.LastUpdated = time.Now() existing.UpdatedBy = updatedBy } } func (gp *GossipProtocolImpl) calculateNodeLoad() float64 { // Calculate current node load (simplified) gp.mu.RLock() metadataCount := len(gp.metadataCache) gp.mu.RUnlock() return float64(metadataCount) / 100.0 // Normalize to [0,1] range } func (gp *GossipProtocolImpl) compressMessage(data []byte) ([]byte, error) { // Simplified compression - would use actual compression in production return data, nil } // min returns the minimum of two integers func min(a, b int) int { if a < b { return a } return b }