Complete BZZZ functionality port to CHORUS

🎭 CHORUS now contains full BZZZ functionality adapted for containers

Core systems ported:
- P2P networking (libp2p with DHT and PubSub)
- Task coordination (COOEE protocol)
- HMMM collaborative reasoning
- SHHH encryption and security
- SLURP admin election system
- UCXL content addressing
- UCXI server integration
- Hypercore logging system
- Health monitoring and graceful shutdown
- License validation with KACHING

Container adaptations:
- Environment variable configuration (no YAML files)
- Container-optimized logging to stdout/stderr
- Auto-generated agent IDs for container deployments
- Docker-first architecture

All proven BZZZ P2P protocols, AI integration, and collaboration
features are now available in containerized form.

Next: Build and test container deployment.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-09-02 20:02:37 +10:00
parent 7c6cbd562a
commit 543ab216f9
224 changed files with 86331 additions and 186 deletions

View File

@@ -0,0 +1,313 @@
package integration
import (
"context"
"crypto/sha256"
"encoding/json"
"fmt"
"log"
"time"
"chorus.services/bzzz/pkg/dht"
"chorus.services/bzzz/pkg/ucxl"
)
// DecisionPublisher handles publishing decisions to encrypted DHT storage
type DecisionPublisher struct {
dhtStorage *dht.EncryptedDHTStorage
enabled bool
}
// Decision represents a decision made from a HMMM discussion
type Decision struct {
Type string `json:"type"` // Event type (approval, warning, etc.)
Content string `json:"content"` // Human-readable decision content
Participants []string `json:"participants"` // Who participated in the decision
ConsensusLevel float64 `json:"consensus_level"` // Strength of consensus (0.0-1.0)
Timestamp time.Time `json:"timestamp"` // When decision was made
DiscussionID string `json:"discussion_id"` // Source discussion ID
Confidence float64 `json:"confidence"` // AI confidence in decision extraction
Metadata map[string]interface{} `json:"metadata"` // Additional decision metadata
UCXLAddress string `json:"ucxl_address"` // Associated UCXL address
ExpiresAt *time.Time `json:"expires_at,omitempty"` // Optional expiration
Tags []string `json:"tags"` // Decision tags
RelatedDecisions []string `json:"related_decisions,omitempty"` // Related decision hashes
}
// PublishResult contains the result of publishing a decision
type PublishResult struct {
UCXLAddress string `json:"ucxl_address"`
DHTHash string `json:"dht_hash"`
Success bool `json:"success"`
PublishedAt time.Time `json:"published_at"`
Error string `json:"error,omitempty"`
}
// NewDecisionPublisher creates a new decision publisher
func NewDecisionPublisher(dhtStorage *dht.EncryptedDHTStorage, enabled bool) *DecisionPublisher {
return &DecisionPublisher{
dhtStorage: dhtStorage,
enabled: enabled,
}
}
// PublishDecision publishes a decision to the encrypted DHT storage
func (dp *DecisionPublisher) PublishDecision(ctx context.Context, ucxlAddr *ucxl.Address, decision *Decision) (*PublishResult, error) {
result := &PublishResult{
UCXLAddress: ucxlAddr.String(),
PublishedAt: time.Now(),
}
if !dp.enabled {
result.Error = "Decision publishing is disabled"
log.Printf("📤 Decision publishing skipped (disabled): %s", ucxlAddr.String())
return result, nil
}
// Enrich decision with UCXL address
decision.UCXLAddress = ucxlAddr.String()
// Serialize decision to JSON
decisionJSON, err := json.Marshal(decision)
if err != nil {
result.Error = fmt.Sprintf("failed to serialize decision: %v", err)
return result, fmt.Errorf("failed to serialize decision: %w", err)
}
// Determine creator role from UCXL address
creatorRole := ucxlAddr.Role
if creatorRole == "any" || creatorRole == "" {
creatorRole = "contributor" // Default role for decisions
}
// Store in encrypted DHT
err = dp.dhtStorage.StoreUCXLContent(
ucxlAddr.String(),
decisionJSON,
creatorRole,
"decision",
)
if err != nil {
result.Error = err.Error()
return result, fmt.Errorf("failed to store decision in DHT: %w", err)
}
// Generate content hash for reference
result.DHTHash = fmt.Sprintf("sha256:%x", sha256.Sum256(decisionJSON))
result.Success = true
log.Printf("📤 Decision published to DHT: %s (hash: %s)", ucxlAddr.String(), result.DHTHash[:16]+"...")
return result, nil
}
// RetrieveDecision retrieves a decision from the encrypted DHT storage
func (dp *DecisionPublisher) RetrieveDecision(ctx context.Context, ucxlAddr *ucxl.Address) (*Decision, error) {
if !dp.enabled {
return nil, fmt.Errorf("decision publishing is disabled")
}
// Retrieve from encrypted DHT
content, metadata, err := dp.dhtStorage.RetrieveUCXLContent(ucxlAddr.String())
if err != nil {
return nil, fmt.Errorf("failed to retrieve decision from DHT: %w", err)
}
// Verify content type
if metadata.ContentType != "decision" {
return nil, fmt.Errorf("content at address is not a decision (type: %s)", metadata.ContentType)
}
// Deserialize decision
var decision Decision
if err := json.Unmarshal(content, &decision); err != nil {
return nil, fmt.Errorf("failed to deserialize decision: %w", err)
}
log.Printf("📥 Decision retrieved from DHT: %s", ucxlAddr.String())
return &decision, nil
}
// ListDecisionsByRole lists decisions accessible by a specific role
func (dp *DecisionPublisher) ListDecisionsByRole(ctx context.Context, role string, limit int) ([]*Decision, error) {
if !dp.enabled {
return nil, fmt.Errorf("decision publishing is disabled")
}
// Get content metadata from DHT
metadataList, err := dp.dhtStorage.ListContentByRole(role, limit)
if err != nil {
return nil, fmt.Errorf("failed to list content by role: %w", err)
}
decisions := make([]*Decision, 0)
// Retrieve each decision
for _, metadata := range metadataList {
if metadata.ContentType != "decision" {
continue // Skip non-decisions
}
// Parse UCXL address
addr, err := ucxl.Parse(metadata.Address)
if err != nil {
log.Printf("⚠️ Invalid UCXL address in decision metadata: %s", metadata.Address)
continue
}
// Retrieve decision content
decision, err := dp.RetrieveDecision(ctx, addr)
if err != nil {
log.Printf("⚠️ Failed to retrieve decision %s: %v", metadata.Address, err)
continue
}
decisions = append(decisions, decision)
// Respect limit
if len(decisions) >= limit {
break
}
}
log.Printf("📋 Listed %d decisions for role: %s", len(decisions), role)
return decisions, nil
}
// UpdateDecision updates an existing decision or creates a new version
func (dp *DecisionPublisher) UpdateDecision(ctx context.Context, ucxlAddr *ucxl.Address, decision *Decision) (*PublishResult, error) {
if !dp.enabled {
result := &PublishResult{
UCXLAddress: ucxlAddr.String(),
PublishedAt: time.Now(),
Error: "Decision publishing is disabled",
}
return result, nil
}
// Check if decision already exists
existingDecision, err := dp.RetrieveDecision(ctx, ucxlAddr)
if err == nil {
// Decision exists, create related decision reference
decision.RelatedDecisions = append(decision.RelatedDecisions, dp.generateDecisionHash(existingDecision))
log.Printf("📝 Updating existing decision: %s", ucxlAddr.String())
} else {
log.Printf("📝 Creating new decision: %s", ucxlAddr.String())
}
// Publish the updated/new decision
return dp.PublishDecision(ctx, ucxlAddr, decision)
}
// SearchDecisions searches for decisions matching criteria
func (dp *DecisionPublisher) SearchDecisions(ctx context.Context, searchCriteria map[string]string, limit int) ([]*Decision, error) {
if !dp.enabled {
return nil, fmt.Errorf("decision publishing is disabled")
}
// Convert search criteria to DHT search query
query := &dht.SearchQuery{
Agent: searchCriteria["agent"],
Role: searchCriteria["role"],
Project: searchCriteria["project"],
Task: searchCriteria["task"],
ContentType: "decision",
Limit: limit,
}
// Parse time filters if provided
if createdAfter := searchCriteria["created_after"]; createdAfter != "" {
if t, err := time.Parse(time.RFC3339, createdAfter); err == nil {
query.CreatedAfter = t
}
}
if createdBefore := searchCriteria["created_before"]; createdBefore != "" {
if t, err := time.Parse(time.RFC3339, createdBefore); err == nil {
query.CreatedBefore = t
}
}
// Search DHT for matching decisions
searchResults, err := dp.dhtStorage.SearchContent(query)
if err != nil {
return nil, fmt.Errorf("failed to search decisions: %w", err)
}
decisions := make([]*Decision, 0, len(searchResults))
// Retrieve each decision
for _, metadata := range searchResults {
// Parse UCXL address
addr, err := ucxl.Parse(metadata.Address)
if err != nil {
log.Printf("⚠️ Invalid UCXL address in search results: %s", metadata.Address)
continue
}
// Retrieve decision content
decision, err := dp.RetrieveDecision(ctx, addr)
if err != nil {
log.Printf("⚠️ Failed to retrieve decision %s: %v", metadata.Address, err)
continue
}
decisions = append(decisions, decision)
}
log.Printf("🔍 Search found %d decisions", len(decisions))
return decisions, nil
}
// GetDecisionMetrics returns metrics about decisions in the system
func (dp *DecisionPublisher) GetDecisionMetrics(ctx context.Context) (map[string]interface{}, error) {
if !dp.enabled {
return map[string]interface{}{
"enabled": false,
"message": "Decision publishing is disabled",
}, nil
}
// Get DHT storage metrics
dhtMetrics := dp.dhtStorage.GetMetrics()
// Add decision-specific metrics
metrics := map[string]interface{}{
"enabled": true,
"dht_storage": dhtMetrics,
"last_updated": time.Now(),
}
return metrics, nil
}
// generateDecisionHash generates a hash for a decision to use in references
func (dp *DecisionPublisher) generateDecisionHash(decision *Decision) string {
// Create hash from key decision fields
hashData := fmt.Sprintf("%s_%s_%s_%d",
decision.Type,
decision.UCXLAddress,
decision.DiscussionID,
decision.Timestamp.Unix(),
)
hash := sha256.Sum256([]byte(hashData))
return fmt.Sprintf("decision_%x", hash[:8])
}
// IsEnabled returns whether decision publishing is enabled
func (dp *DecisionPublisher) IsEnabled() bool {
return dp.enabled
}
// Enable enables decision publishing
func (dp *DecisionPublisher) Enable() {
dp.enabled = true
log.Printf("📤 Decision publishing enabled")
}
// Disable disables decision publishing
func (dp *DecisionPublisher) Disable() {
dp.enabled = false
log.Printf("🚫 Decision publishing disabled")
}

View File

@@ -0,0 +1,327 @@
package integration
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"chorus.services/bzzz/pkg/config"
)
// SlurpClient handles HTTP communication with SLURP endpoints
type SlurpClient struct {
baseURL string
apiKey string
timeout time.Duration
retryCount int
retryDelay time.Duration
httpClient *http.Client
}
// SlurpEvent represents a SLURP event structure
type SlurpEvent struct {
EventType string `json:"event_type"`
Path string `json:"path"`
Content string `json:"content"`
Severity int `json:"severity"`
CreatedBy string `json:"created_by"`
Metadata map[string]interface{} `json:"metadata"`
Tags []string `json:"tags,omitempty"`
Timestamp time.Time `json:"timestamp"`
}
// EventResponse represents the response from SLURP API
type EventResponse struct {
Success bool `json:"success"`
EventID string `json:"event_id,omitempty"`
Message string `json:"message,omitempty"`
Error string `json:"error,omitempty"`
Timestamp time.Time `json:"timestamp"`
}
// BatchEventRequest represents a batch of events to be sent to SLURP
type BatchEventRequest struct {
Events []SlurpEvent `json:"events"`
Source string `json:"source"`
}
// BatchEventResponse represents the response for batch event creation
type BatchEventResponse struct {
Success bool `json:"success"`
ProcessedCount int `json:"processed_count"`
FailedCount int `json:"failed_count"`
EventIDs []string `json:"event_ids,omitempty"`
Errors []string `json:"errors,omitempty"`
Message string `json:"message,omitempty"`
Timestamp time.Time `json:"timestamp"`
}
// HealthResponse represents SLURP service health status
type HealthResponse struct {
Status string `json:"status"`
Version string `json:"version,omitempty"`
Uptime string `json:"uptime,omitempty"`
Timestamp time.Time `json:"timestamp"`
}
// NewSlurpClient creates a new SLURP API client
func NewSlurpClient(config config.SlurpConfig) *SlurpClient {
return &SlurpClient{
baseURL: strings.TrimSuffix(config.BaseURL, "/"),
apiKey: config.APIKey,
timeout: config.Timeout,
retryCount: config.RetryCount,
retryDelay: config.RetryDelay,
httpClient: &http.Client{
Timeout: config.Timeout,
},
}
}
// CreateEvent sends a single event to SLURP
func (c *SlurpClient) CreateEvent(ctx context.Context, event SlurpEvent) (*EventResponse, error) {
url := fmt.Sprintf("%s/api/events", c.baseURL)
eventData, err := json.Marshal(event)
if err != nil {
return nil, fmt.Errorf("failed to marshal event: %w", err)
}
var lastErr error
for attempt := 0; attempt <= c.retryCount; attempt++ {
if attempt > 0 {
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(c.retryDelay):
}
}
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(eventData))
if err != nil {
lastErr = fmt.Errorf("failed to create request: %w", err)
continue
}
c.setHeaders(req)
resp, err := c.httpClient.Do(req)
if err != nil {
lastErr = fmt.Errorf("failed to send request: %w", err)
continue
}
defer resp.Body.Close()
if c.isRetryableStatus(resp.StatusCode) && attempt < c.retryCount {
lastErr = fmt.Errorf("retryable error: HTTP %d", resp.StatusCode)
continue
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
var eventResp EventResponse
if err := json.Unmarshal(body, &eventResp); err != nil {
return nil, fmt.Errorf("failed to unmarshal response: %w", err)
}
if resp.StatusCode >= 400 {
return &eventResp, fmt.Errorf("SLURP API error (HTTP %d): %s", resp.StatusCode, eventResp.Error)
}
return &eventResp, nil
}
return nil, fmt.Errorf("failed after %d attempts: %w", c.retryCount+1, lastErr)
}
// CreateEventsBatch sends multiple events to SLURP in a single request
func (c *SlurpClient) CreateEventsBatch(ctx context.Context, events []SlurpEvent) (*BatchEventResponse, error) {
url := fmt.Sprintf("%s/api/events/batch", c.baseURL)
batchRequest := BatchEventRequest{
Events: events,
Source: "bzzz-hmmm-integration",
}
batchData, err := json.Marshal(batchRequest)
if err != nil {
return nil, fmt.Errorf("failed to marshal batch request: %w", err)
}
var lastErr error
for attempt := 0; attempt <= c.retryCount; attempt++ {
if attempt > 0 {
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(c.retryDelay):
}
}
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(batchData))
if err != nil {
lastErr = fmt.Errorf("failed to create batch request: %w", err)
continue
}
c.setHeaders(req)
resp, err := c.httpClient.Do(req)
if err != nil {
lastErr = fmt.Errorf("failed to send batch request: %w", err)
continue
}
defer resp.Body.Close()
if c.isRetryableStatus(resp.StatusCode) && attempt < c.retryCount {
lastErr = fmt.Errorf("retryable error: HTTP %d", resp.StatusCode)
continue
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read batch response body: %w", err)
}
var batchResp BatchEventResponse
if err := json.Unmarshal(body, &batchResp); err != nil {
return nil, fmt.Errorf("failed to unmarshal batch response: %w", err)
}
if resp.StatusCode >= 400 {
return &batchResp, fmt.Errorf("SLURP batch API error (HTTP %d): %s", resp.StatusCode, batchResp.Message)
}
return &batchResp, nil
}
return nil, fmt.Errorf("batch failed after %d attempts: %w", c.retryCount+1, lastErr)
}
// GetHealth checks SLURP service health
func (c *SlurpClient) GetHealth(ctx context.Context) (*HealthResponse, error) {
url := fmt.Sprintf("%s/api/health", c.baseURL)
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, fmt.Errorf("failed to create health request: %w", err)
}
c.setHeaders(req)
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send health request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read health response: %w", err)
}
var healthResp HealthResponse
if err := json.Unmarshal(body, &healthResp); err != nil {
return nil, fmt.Errorf("failed to unmarshal health response: %w", err)
}
if resp.StatusCode >= 400 {
return &healthResp, fmt.Errorf("SLURP health check failed (HTTP %d)", resp.StatusCode)
}
return &healthResp, nil
}
// QueryEvents retrieves events from SLURP based on filters
func (c *SlurpClient) QueryEvents(ctx context.Context, filters map[string]string) ([]SlurpEvent, error) {
baseURL := fmt.Sprintf("%s/api/events", c.baseURL)
// Build query parameters
params := url.Values{}
for key, value := range filters {
params.Add(key, value)
}
queryURL := baseURL
if len(params) > 0 {
queryURL = fmt.Sprintf("%s?%s", baseURL, params.Encode())
}
req, err := http.NewRequestWithContext(ctx, "GET", queryURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to create query request: %w", err)
}
c.setHeaders(req)
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send query request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read query response: %w", err)
}
var events []SlurpEvent
if err := json.Unmarshal(body, &events); err != nil {
return nil, fmt.Errorf("failed to unmarshal events: %w", err)
}
if resp.StatusCode >= 400 {
return nil, fmt.Errorf("SLURP query failed (HTTP %d)", resp.StatusCode)
}
return events, nil
}
// setHeaders sets common HTTP headers for SLURP API requests
func (c *SlurpClient) setHeaders(req *http.Request) {
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Accept", "application/json")
req.Header.Set("User-Agent", "Bzzz-HMMM-Integration/1.0")
if c.apiKey != "" {
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", c.apiKey))
}
}
// isRetryableStatus determines if an HTTP status code is retryable
func (c *SlurpClient) isRetryableStatus(statusCode int) bool {
switch statusCode {
case http.StatusTooManyRequests, // 429
http.StatusInternalServerError, // 500
http.StatusBadGateway, // 502
http.StatusServiceUnavailable, // 503
http.StatusGatewayTimeout: // 504
return true
default:
return false
}
}
// Close cleans up the client resources
func (c *SlurpClient) Close() error {
// HTTP client doesn't need explicit cleanup, but we can implement
// connection pooling cleanup if needed in the future
return nil
}
// ValidateConnection tests the connection to SLURP
func (c *SlurpClient) ValidateConnection(ctx context.Context) error {
_, err := c.GetHealth(ctx)
return err
}

View File

@@ -0,0 +1,776 @@
package integration
import (
"context"
"fmt"
"math"
"regexp"
"strings"
"sync"
"time"
"chorus.services/bzzz/pkg/config"
"chorus.services/bzzz/pkg/ucxl"
"chorus.services/bzzz/pubsub"
"github.com/libp2p/go-libp2p/core/peer"
)
// SlurpEventIntegrator manages the integration between HMMM discussions and SLURP events
type SlurpEventIntegrator struct {
config config.SlurpConfig
client *SlurpClient
pubsub *pubsub.PubSub
eventMapping config.HmmmToSlurpMapping
decisionPublisher *DecisionPublisher
// Batch processing
eventBatch []SlurpEvent
batchMutex sync.Mutex
batchTimer *time.Timer
// Context and lifecycle
ctx context.Context
cancel context.CancelFunc
// Statistics
stats SlurpIntegrationStats
statsMutex sync.RWMutex
}
// SlurpIntegrationStats tracks integration performance metrics
type SlurpIntegrationStats struct {
EventsGenerated int64 `json:"events_generated"`
EventsSuccessful int64 `json:"events_successful"`
EventsFailed int64 `json:"events_failed"`
BatchesSent int64 `json:"batches_sent"`
LastEventTime time.Time `json:"last_event_time"`
LastSuccessTime time.Time `json:"last_success_time"`
LastFailureTime time.Time `json:"last_failure_time"`
LastFailureError string `json:"last_failure_error"`
AverageResponseTime float64 `json:"average_response_time_ms"`
}
// HmmmDiscussionContext represents a HMMM discussion that can generate SLURP events
type HmmmDiscussionContext struct {
DiscussionID string `json:"discussion_id"`
SessionID string `json:"session_id,omitempty"`
Participants []string `json:"participants"`
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
Messages []HmmmMessage `json:"messages"`
ConsensusReached bool `json:"consensus_reached"`
ConsensusStrength float64 `json:"consensus_strength"`
OutcomeType string `json:"outcome_type"`
ProjectPath string `json:"project_path"`
RelatedTasks []string `json:"related_tasks,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
// HmmmMessage represents a message in a HMMM discussion
type HmmmMessage struct {
From string `json:"from"`
Content string `json:"content"`
Type string `json:"type"`
Timestamp time.Time `json:"timestamp"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
// NewSlurpEventIntegrator creates a new SLURP event integrator
func NewSlurpEventIntegrator(ctx context.Context, slurpConfig config.SlurpConfig, ps *pubsub.PubSub, decisionPublisher *DecisionPublisher) (*SlurpEventIntegrator, error) {
if !slurpConfig.Enabled {
return nil, fmt.Errorf("SLURP integration is disabled in configuration")
}
client := NewSlurpClient(slurpConfig)
// Test connection to SLURP
if err := client.ValidateConnection(ctx); err != nil {
return nil, fmt.Errorf("failed to connect to SLURP: %w", err)
}
integrationCtx, cancel := context.WithCancel(ctx)
integrator := &SlurpEventIntegrator{
config: slurpConfig,
client: client,
pubsub: ps,
eventMapping: config.GetHmmmToSlurpMapping(),
decisionPublisher: decisionPublisher,
eventBatch: make([]SlurpEvent, 0, slurpConfig.BatchProcessing.MaxBatchSize),
ctx: integrationCtx,
cancel: cancel,
stats: SlurpIntegrationStats{},
}
// Initialize batch processing if enabled
if slurpConfig.BatchProcessing.Enabled {
integrator.initBatchProcessing()
}
fmt.Printf("🎯 SLURP Event Integrator initialized for %s\n", slurpConfig.BaseURL)
return integrator, nil
}
// ProcessHmmmDiscussion analyzes a HMMM discussion and generates appropriate SLURP events
func (s *SlurpEventIntegrator) ProcessHmmmDiscussion(ctx context.Context, discussion HmmmDiscussionContext) error {
s.statsMutex.Lock()
s.stats.EventsGenerated++
s.stats.LastEventTime = time.Now()
s.statsMutex.Unlock()
// Validate discussion meets generation criteria
if !s.shouldGenerateEvent(discussion) {
fmt.Printf("📊 Discussion %s does not meet event generation criteria\n", discussion.DiscussionID)
return nil
}
// Determine event type from discussion
eventType, confidence := s.determineEventType(discussion)
if eventType == "" {
fmt.Printf("📊 Could not determine event type for discussion %s\n", discussion.DiscussionID)
return nil
}
// Calculate severity
severity := s.calculateSeverity(discussion, eventType)
// Generate event content
content := s.generateEventContent(discussion)
// Generate UCXL address for this discussion
ucxlAddr, err := s.generateUCXLAddress(discussion)
if err != nil {
fmt.Printf("⚠️ Failed to generate UCXL address: %v", err)
// Continue without UCXL address if generation fails
}
// Create SLURP event with UCXL enrichment
slurpEvent := SlurpEvent{
EventType: eventType,
Path: discussion.ProjectPath,
Content: content,
Severity: severity,
CreatedBy: s.config.DefaultEventSettings.DefaultCreatedBy,
Timestamp: time.Now(),
Tags: append(s.config.DefaultEventSettings.DefaultTags, fmt.Sprintf("confidence-%.2f", confidence)),
Metadata: map[string]interface{}{
"discussion_id": discussion.DiscussionID,
"session_id": discussion.SessionID,
"participants": discussion.Participants,
"consensus_strength": discussion.ConsensusStrength,
"discussion_duration": discussion.EndTime.Sub(discussion.StartTime).String(),
"message_count": len(discussion.Messages),
"outcome_type": discussion.OutcomeType,
"generation_confidence": confidence,
},
}
// Add UCXL address components if successfully generated
if ucxlAddr != nil {
slurpEvent.Metadata["ucxl_reference"] = ucxlAddr.String()
slurpEvent.Metadata["ucxl_agent"] = ucxlAddr.Agent
slurpEvent.Metadata["ucxl_role"] = ucxlAddr.Role
slurpEvent.Metadata["ucxl_project"] = ucxlAddr.Project
slurpEvent.Metadata["ucxl_task"] = ucxlAddr.Task
slurpEvent.Metadata["ucxl_temporal"] = ucxlAddr.TemporalSegment.String()
if ucxlAddr.Path != "" {
slurpEvent.Metadata["ucxl_path"] = ucxlAddr.Path
}
}
// Add custom metadata from template
for key, value := range s.config.DefaultEventSettings.MetadataTemplate {
slurpEvent.Metadata[key] = value
}
// Add discussion-specific metadata
for key, value := range discussion.Metadata {
slurpEvent.Metadata[key] = value
}
// Publish decision to DHT if UCXL address was successfully generated and decision publisher is available
if ucxlAddr != nil && s.decisionPublisher != nil && s.decisionPublisher.IsEnabled() {
if s.shouldPublishDecision(eventType) {
decision := s.createDecisionFromDiscussion(discussion, eventType, confidence)
publishResult, err := s.decisionPublisher.PublishDecision(ctx, ucxlAddr, decision)
if err != nil {
log.Printf("⚠️ Failed to publish decision to DHT: %v", err)
} else if publishResult.Success {
// Add DHT reference to event metadata
slurpEvent.Metadata["decision_dht_hash"] = publishResult.DHTHash
slurpEvent.Metadata["decision_published"] = true
slurpEvent.Metadata["decision_published_at"] = publishResult.PublishedAt
log.Printf("📤 Decision published to DHT: %s", publishResult.DHTHash[:16]+"...")
}
}
}
// Send event (batch or immediate)
if s.config.BatchProcessing.Enabled {
return s.addToBatch(slurpEvent)
} else {
return s.sendImmediateEvent(ctx, slurpEvent, discussion.DiscussionID)
}
}
// shouldGenerateEvent determines if a discussion meets the criteria for event generation
func (s *SlurpEventIntegrator) shouldGenerateEvent(discussion HmmmDiscussionContext) bool {
// Check minimum participants
if len(discussion.Participants) < s.config.EventGeneration.MinParticipants {
return false
}
// Check consensus strength
if discussion.ConsensusStrength < s.config.EventGeneration.MinConsensusStrength {
return false
}
// Check discussion duration
duration := discussion.EndTime.Sub(discussion.StartTime)
if duration < s.config.EventGeneration.MinDiscussionDuration {
return false
}
if duration > s.config.EventGeneration.MaxDiscussionDuration {
return false // Too long, might indicate stalled discussion
}
// Check if unanimity is required and achieved
if s.config.EventGeneration.RequireUnanimity && discussion.ConsensusStrength < 1.0 {
return false
}
return true
}
// determineEventType analyzes discussion content to determine SLURP event type
func (s *SlurpEventIntegrator) determineEventType(discussion HmmmDiscussionContext) (string, float64) {
// Combine all message content for analysis
var allContent strings.Builder
for _, msg := range discussion.Messages {
allContent.WriteString(strings.ToLower(msg.Content))
allContent.WriteString(" ")
}
content := allContent.String()
// Score each event type based on keyword matches
scores := make(map[string]float64)
scores["approval"] = s.scoreKeywordMatch(content, s.eventMapping.ApprovalKeywords)
scores["warning"] = s.scoreKeywordMatch(content, s.eventMapping.WarningKeywords)
scores["blocker"] = s.scoreKeywordMatch(content, s.eventMapping.BlockerKeywords)
scores["priority_change"] = s.scoreKeywordMatch(content, s.eventMapping.PriorityKeywords)
scores["access_update"] = s.scoreKeywordMatch(content, s.eventMapping.AccessKeywords)
scores["structural_change"] = s.scoreKeywordMatch(content, s.eventMapping.StructuralKeywords)
scores["announcement"] = s.scoreKeywordMatch(content, s.eventMapping.AnnouncementKeywords)
// Find highest scoring event type
var bestType string
var bestScore float64
for eventType, score := range scores {
if score > bestScore {
bestType = eventType
bestScore = score
}
}
// Require minimum confidence threshold
minConfidence := 0.3
if bestScore < minConfidence {
return "", 0
}
// Check if event type is enabled
if s.isEventTypeDisabled(bestType) {
return "", 0
}
return bestType, bestScore
}
// scoreKeywordMatch calculates a score based on keyword frequency
func (s *SlurpEventIntegrator) scoreKeywordMatch(content string, keywords []string) float64 {
if len(keywords) == 0 {
return 0
}
matches := 0
for _, keyword := range keywords {
if strings.Contains(content, strings.ToLower(keyword)) {
matches++
}
}
return float64(matches) / float64(len(keywords))
}
// isEventTypeDisabled checks if an event type is disabled in configuration
func (s *SlurpEventIntegrator) isEventTypeDisabled(eventType string) bool {
for _, disabled := range s.config.EventGeneration.DisabledEventTypes {
if disabled == eventType {
return true
}
}
// Check if it's in enabled list (if specified)
if len(s.config.EventGeneration.EnabledEventTypes) > 0 {
for _, enabled := range s.config.EventGeneration.EnabledEventTypes {
if enabled == eventType {
return false
}
}
return true // Not in enabled list
}
return false
}
// calculateSeverity determines event severity based on discussion characteristics
func (s *SlurpEventIntegrator) calculateSeverity(discussion HmmmDiscussionContext, eventType string) int {
// Start with base severity for event type
baseSeverity := s.config.EventGeneration.SeverityRules.BaseSeverity[eventType]
if baseSeverity == 0 {
baseSeverity = s.config.DefaultEventSettings.DefaultSeverity
}
severity := float64(baseSeverity)
// Apply participant multiplier
participantBoost := float64(len(discussion.Participants)-1) * s.config.EventGeneration.SeverityRules.ParticipantMultiplier
severity += participantBoost
// Apply duration multiplier
durationHours := discussion.EndTime.Sub(discussion.StartTime).Hours()
durationBoost := durationHours * s.config.EventGeneration.SeverityRules.DurationMultiplier
severity += durationBoost
// Check for urgency keywords
allContent := strings.ToLower(s.generateEventContent(discussion))
for _, keyword := range s.config.EventGeneration.SeverityRules.UrgencyKeywords {
if strings.Contains(allContent, strings.ToLower(keyword)) {
severity += float64(s.config.EventGeneration.SeverityRules.UrgencyBoost)
break // Only apply once
}
}
// Apply bounds
finalSeverity := int(math.Round(severity))
if finalSeverity < s.config.EventGeneration.SeverityRules.MinSeverity {
finalSeverity = s.config.EventGeneration.SeverityRules.MinSeverity
}
if finalSeverity > s.config.EventGeneration.SeverityRules.MaxSeverity {
finalSeverity = s.config.EventGeneration.SeverityRules.MaxSeverity
}
return finalSeverity
}
// generateEventContent creates human-readable content for the SLURP event
func (s *SlurpEventIntegrator) generateEventContent(discussion HmmmDiscussionContext) string {
if discussion.OutcomeType != "" {
return fmt.Sprintf("HMMM discussion reached consensus: %s (%d participants, %.1f%% agreement)",
discussion.OutcomeType,
len(discussion.Participants),
discussion.ConsensusStrength*100)
}
return fmt.Sprintf("HMMM discussion completed with %d participants over %v",
len(discussion.Participants),
discussion.EndTime.Sub(discussion.StartTime).Round(time.Minute))
}
// addToBatch adds an event to the batch for later processing
func (s *SlurpEventIntegrator) addToBatch(event SlurpEvent) error {
s.batchMutex.Lock()
defer s.batchMutex.Unlock()
s.eventBatch = append(s.eventBatch, event)
// Check if batch is full
if len(s.eventBatch) >= s.config.BatchProcessing.MaxBatchSize {
return s.flushBatch()
}
// Reset batch timer
if s.batchTimer != nil {
s.batchTimer.Stop()
}
s.batchTimer = time.AfterFunc(s.config.BatchProcessing.MaxBatchWait, func() {
s.batchMutex.Lock()
defer s.batchMutex.Unlock()
s.flushBatch()
})
fmt.Printf("📦 Added event to batch (%d/%d)\n", len(s.eventBatch), s.config.BatchProcessing.MaxBatchSize)
return nil
}
// flushBatch sends all batched events to SLURP
func (s *SlurpEventIntegrator) flushBatch() error {
if len(s.eventBatch) == 0 {
return nil
}
events := make([]SlurpEvent, len(s.eventBatch))
copy(events, s.eventBatch)
s.eventBatch = s.eventBatch[:0] // Clear batch
if s.batchTimer != nil {
s.batchTimer.Stop()
s.batchTimer = nil
}
fmt.Printf("🚀 Flushing batch of %d events to SLURP\n", len(events))
start := time.Now()
resp, err := s.client.CreateEventsBatch(s.ctx, events)
duration := time.Since(start)
s.statsMutex.Lock()
s.stats.BatchesSent++
s.stats.AverageResponseTime = (s.stats.AverageResponseTime + duration.Seconds()*1000) / 2
if err != nil {
s.stats.EventsFailed += int64(len(events))
s.stats.LastFailureTime = time.Now()
s.stats.LastFailureError = err.Error()
s.statsMutex.Unlock()
// Publish failure notification
s.publishSlurpEvent("slurp_batch_failed", map[string]interface{}{
"error": err.Error(),
"event_count": len(events),
"batch_id": fmt.Sprintf("batch_%d", time.Now().Unix()),
})
return fmt.Errorf("failed to send batch: %w", err)
}
s.stats.EventsSuccessful += int64(resp.ProcessedCount)
s.stats.EventsFailed += int64(resp.FailedCount)
s.stats.LastSuccessTime = time.Now()
s.statsMutex.Unlock()
// Publish success notification
s.publishSlurpEvent("slurp_batch_success", map[string]interface{}{
"processed_count": resp.ProcessedCount,
"failed_count": resp.FailedCount,
"event_ids": resp.EventIDs,
"batch_id": fmt.Sprintf("batch_%d", time.Now().Unix()),
})
fmt.Printf("✅ Batch processed: %d succeeded, %d failed\n", resp.ProcessedCount, resp.FailedCount)
return nil
}
// sendImmediateEvent sends a single event immediately to SLURP
func (s *SlurpEventIntegrator) sendImmediateEvent(ctx context.Context, event SlurpEvent, discussionID string) error {
start := time.Now()
resp, err := s.client.CreateEvent(ctx, event)
duration := time.Since(start)
s.statsMutex.Lock()
s.stats.AverageResponseTime = (s.stats.AverageResponseTime + duration.Seconds()*1000) / 2
if err != nil {
s.stats.EventsFailed++
s.stats.LastFailureTime = time.Now()
s.stats.LastFailureError = err.Error()
s.statsMutex.Unlock()
// Publish failure notification
s.publishSlurpEvent("slurp_event_failed", map[string]interface{}{
"discussion_id": discussionID,
"event_type": event.EventType,
"error": err.Error(),
})
return fmt.Errorf("failed to send event: %w", err)
}
s.stats.EventsSuccessful++
s.stats.LastSuccessTime = time.Now()
s.statsMutex.Unlock()
// Publish success notification
s.publishSlurpEvent("slurp_event_success", map[string]interface{}{
"discussion_id": discussionID,
"event_type": event.EventType,
"event_id": resp.EventID,
"severity": event.Severity,
})
fmt.Printf("✅ SLURP event created: %s (ID: %s)\n", event.EventType, resp.EventID)
return nil
}
// publishSlurpEvent publishes a SLURP integration event to the pubsub system
func (s *SlurpEventIntegrator) publishSlurpEvent(eventType string, data map[string]interface{}) {
var msgType pubsub.MessageType
switch eventType {
case "slurp_event_success", "slurp_batch_success":
msgType = pubsub.SlurpEventGenerated
case "slurp_event_failed", "slurp_batch_failed":
msgType = pubsub.SlurpEventAck
default:
msgType = pubsub.SlurpContextUpdate
}
data["timestamp"] = time.Now()
data["integration_source"] = "hmmm-slurp-integrator"
if err := s.pubsub.PublishHmmmMessage(msgType, data); err != nil {
fmt.Printf("❌ Failed to publish SLURP integration event: %v\n", err)
}
}
// initBatchProcessing initializes batch processing components
func (s *SlurpEventIntegrator) initBatchProcessing() {
fmt.Printf("📦 Batch processing enabled: max_size=%d, max_wait=%v\n",
s.config.BatchProcessing.MaxBatchSize,
s.config.BatchProcessing.MaxBatchWait)
}
// GetStats returns current integration statistics
func (s *SlurpEventIntegrator) GetStats() SlurpIntegrationStats {
s.statsMutex.RLock()
defer s.statsMutex.RUnlock()
return s.stats
}
// Close shuts down the integrator and flushes any pending events
func (s *SlurpEventIntegrator) Close() error {
s.cancel()
// Flush any remaining batched events
if s.config.BatchProcessing.Enabled && s.config.BatchProcessing.FlushOnShutdown {
s.batchMutex.Lock()
if len(s.eventBatch) > 0 {
fmt.Printf("🧹 Flushing %d remaining events on shutdown\n", len(s.eventBatch))
s.flushBatch()
}
s.batchMutex.Unlock()
}
if s.batchTimer != nil {
s.batchTimer.Stop()
}
return s.client.Close()
}
// generateUCXLAddress creates a UCXL address from HMMM discussion context
func (s *SlurpEventIntegrator) generateUCXLAddress(discussion HmmmDiscussionContext) (*ucxl.Address, error) {
// Extract components from discussion
agent := s.extractAgentFromParticipants(discussion.Participants)
role := s.extractRoleFromDiscussion(discussion)
project := s.extractProjectFromPath(discussion.ProjectPath)
task := s.extractTaskFromDiscussion(discussion)
// Use latest temporal segment by default
temporalSegment := "*^"
// Build UCXL address string
addressStr := fmt.Sprintf("ucxl://%s:%s@%s:%s/%s",
agent, role, project, task, temporalSegment)
// Add path if available
if discussion.ProjectPath != "" {
// Extract relative path for UCXL
relativePath := s.extractRelativePath(discussion.ProjectPath)
if relativePath != "" {
addressStr += "/" + relativePath
}
}
// Parse and validate the address
return ucxl.Parse(addressStr)
}
// extractAgentFromParticipants determines the primary agent from participants
func (s *SlurpEventIntegrator) extractAgentFromParticipants(participants []string) string {
if len(participants) == 0 {
return "any"
}
// Use the first participant as the primary agent, or "consensus" for multiple
if len(participants) == 1 {
return s.normalizeIdentifier(participants[0])
}
return "consensus"
}
// extractRoleFromDiscussion determines the role from discussion context
func (s *SlurpEventIntegrator) extractRoleFromDiscussion(discussion HmmmDiscussionContext) string {
// Look for role hints in metadata
if discussion.Metadata != nil {
if role, exists := discussion.Metadata["primary_role"]; exists {
if roleStr, ok := role.(string); ok {
return s.normalizeIdentifier(roleStr)
}
}
// Check for role-specific keywords in outcome type
switch discussion.OutcomeType {
case "architecture_decision":
return "architect"
case "security_review":
return "security"
case "code_review":
return "developer"
case "deployment_decision":
return "ops"
default:
return "contributor"
}
}
return "contributor"
}
// extractProjectFromPath extracts project name from project path
func (s *SlurpEventIntegrator) extractProjectFromPath(projectPath string) string {
if projectPath == "" {
return "unknown"
}
// Split path and take the first segment as project
parts := strings.Split(strings.Trim(projectPath, "/"), "/")
if len(parts) > 0 && parts[0] != "" {
return s.normalizeIdentifier(parts[0])
}
return "unknown"
}
// extractTaskFromDiscussion determines task from discussion context
func (s *SlurpEventIntegrator) extractTaskFromDiscussion(discussion HmmmDiscussionContext) string {
// First check for explicit task in related tasks
if len(discussion.RelatedTasks) > 0 {
return s.normalizeIdentifier(discussion.RelatedTasks[0])
}
// Check metadata for task information
if discussion.Metadata != nil {
if task, exists := discussion.Metadata["task_id"]; exists {
if taskStr, ok := task.(string); ok {
return s.normalizeIdentifier(taskStr)
}
}
if feature, exists := discussion.Metadata["feature"]; exists {
if featureStr, ok := feature.(string); ok {
return s.normalizeIdentifier(featureStr)
}
}
}
// Fall back to discussion ID as task identifier
if discussion.DiscussionID != "" {
return s.normalizeIdentifier("discussion-" + discussion.DiscussionID)
}
return "general"
}
// extractRelativePath extracts relative path from project path for UCXL
func (s *SlurpEventIntegrator) extractRelativePath(projectPath string) string {
if projectPath == "" {
return ""
}
// Remove leading slash and split
trimmed := strings.Trim(projectPath, "/")
parts := strings.Split(trimmed, "/")
// If we have more than just the project name, join the rest as relative path
if len(parts) > 1 {
return strings.Join(parts[1:], "/")
}
return ""
}
// normalizeIdentifier normalizes identifiers for UCXL compliance
func (s *SlurpEventIntegrator) normalizeIdentifier(identifier string) string {
if identifier == "" {
return "unknown"
}
// Convert to lowercase and replace invalid characters with underscores
normalized := strings.ToLower(identifier)
normalized = regexp.MustCompile(`[^a-zA-Z0-9_\-]`).ReplaceAllString(normalized, "_")
// Ensure it doesn't start with a number or special character
if !regexp.MustCompile(`^[a-zA-Z_]`).MatchString(normalized) {
normalized = "id_" + normalized
}
// Truncate if too long (UCXL components should be reasonable length)
if len(normalized) > 50 {
normalized = normalized[:50]
}
return normalized
}
// shouldPublishDecision determines if an event type warrants decision publication
func (s *SlurpEventIntegrator) shouldPublishDecision(eventType string) bool {
// Only publish decisions for conclusive outcomes
decisiveEventTypes := []string{
"approval",
"blocker",
"structural_change",
"priority_change",
"access_update",
}
for _, decisive := range decisiveEventTypes {
if eventType == decisive {
return true
}
}
return false
}
// createDecisionFromDiscussion creates a Decision object from HMMM discussion context
func (s *SlurpEventIntegrator) createDecisionFromDiscussion(discussion HmmmDiscussionContext, eventType string, confidence float64) *Decision {
decision := &Decision{
Type: eventType,
Content: s.generateEventContent(discussion),
Participants: discussion.Participants,
ConsensusLevel: discussion.ConsensusStrength,
Timestamp: time.Now(),
DiscussionID: discussion.DiscussionID,
Confidence: confidence,
Tags: []string{"hmmm-generated", "consensus-based", eventType},
Metadata: map[string]interface{}{
"session_id": discussion.SessionID,
"discussion_duration": discussion.EndTime.Sub(discussion.StartTime).String(),
"message_count": len(discussion.Messages),
"outcome_type": discussion.OutcomeType,
"project_path": discussion.ProjectPath,
"related_tasks": discussion.RelatedTasks,
"generation_source": "slurp-event-integrator",
"generation_timestamp": time.Now(),
},
}
// Add discussion metadata to decision metadata
if discussion.Metadata != nil {
for key, value := range discussion.Metadata {
decision.Metadata["discussion_"+key] = value
}
}
// Set expiration for temporary decisions (warnings, announcements)
if eventType == "warning" || eventType == "announcement" {
expiration := time.Now().Add(30 * 24 * time.Hour) // 30 days
decision.ExpiresAt = &expiration
}
return decision
}

View File

@@ -0,0 +1,474 @@
package integration
import (
"context"
"crypto/sha256"
"encoding/json"
"fmt"
"log"
"math"
"math/rand"
"os"
"path/filepath"
"sync"
"time"
)
// CircuitState represents the state of a circuit breaker
type CircuitState int
const (
CircuitClosed CircuitState = iota
CircuitOpen
CircuitHalfOpen
)
// String returns string representation of circuit state
func (s CircuitState) String() string {
switch s {
case CircuitClosed:
return "CLOSED"
case CircuitOpen:
return "OPEN"
case CircuitHalfOpen:
return "HALF_OPEN"
default:
return "UNKNOWN"
}
}
// CircuitBreaker implements circuit breaker pattern for SLURP client
type CircuitBreaker struct {
mu sync.RWMutex
state CircuitState
failureCount int
consecutiveFailures int
lastFailureTime time.Time
nextRetryTime time.Time
// Configuration
maxFailures int // Max failures before opening circuit
cooldownPeriod time.Duration // How long to stay open
halfOpenTimeout time.Duration // How long to wait in half-open before closing
// Metrics
totalRequests int64
successfulRequests int64
failedRequests int64
}
// NewCircuitBreaker creates a new circuit breaker
func NewCircuitBreaker(maxFailures int, cooldownPeriod, halfOpenTimeout time.Duration) *CircuitBreaker {
return &CircuitBreaker{
state: CircuitClosed,
maxFailures: maxFailures,
cooldownPeriod: cooldownPeriod,
halfOpenTimeout: halfOpenTimeout,
}
}
// CanProceed checks if request can proceed through circuit breaker
func (cb *CircuitBreaker) CanProceed() bool {
cb.mu.Lock()
defer cb.mu.Unlock()
cb.totalRequests++
switch cb.state {
case CircuitClosed:
return true
case CircuitOpen:
if time.Now().After(cb.nextRetryTime) {
cb.state = CircuitHalfOpen
log.Printf("🔄 Circuit breaker moving to HALF_OPEN state")
return true
}
return false
case CircuitHalfOpen:
return true
default:
return false
}
}
// RecordSuccess records a successful operation
func (cb *CircuitBreaker) RecordSuccess() {
cb.mu.Lock()
defer cb.mu.Unlock()
cb.successfulRequests++
cb.failureCount = 0
cb.consecutiveFailures = 0
if cb.state == CircuitHalfOpen {
cb.state = CircuitClosed
log.Printf("✅ Circuit breaker closed after successful operation")
}
}
// RecordFailure records a failed operation
func (cb *CircuitBreaker) RecordFailure() {
cb.mu.Lock()
defer cb.mu.Unlock()
cb.failedRequests++
cb.failureCount++
cb.consecutiveFailures++
cb.lastFailureTime = time.Now()
if cb.failureCount >= cb.maxFailures && cb.state == CircuitClosed {
cb.state = CircuitOpen
cb.nextRetryTime = time.Now().Add(cb.cooldownPeriod)
log.Printf("🚫 Circuit breaker opened due to %d consecutive failures", cb.consecutiveFailures)
}
}
// GetStats returns circuit breaker statistics
func (cb *CircuitBreaker) GetStats() map[string]interface{} {
cb.mu.RLock()
defer cb.mu.RUnlock()
return map[string]interface{}{
"state": cb.state.String(),
"total_requests": cb.totalRequests,
"successful_requests": cb.successfulRequests,
"failed_requests": cb.failedRequests,
"current_failures": cb.failureCount,
"consecutive_failures": cb.consecutiveFailures,
"last_failure_time": cb.lastFailureTime,
"next_retry_time": cb.nextRetryTime,
}
}
// IdempotencyManager handles idempotency key generation and tracking
type IdempotencyManager struct {
keys map[string]time.Time
mu sync.RWMutex
maxAge time.Duration
}
// NewIdempotencyManager creates a new idempotency manager
func NewIdempotencyManager(maxAge time.Duration) *IdempotencyManager {
im := &IdempotencyManager{
keys: make(map[string]time.Time),
maxAge: maxAge,
}
// Start cleanup goroutine
go im.cleanupExpiredKeys()
return im
}
// GenerateKey generates a stable idempotency key for an event
func (im *IdempotencyManager) GenerateKey(discussionID, eventType string, timestamp time.Time) string {
// Create 5-minute time buckets to handle slight timing differences
bucket := timestamp.Truncate(5 * time.Minute)
// Generate stable hash
data := fmt.Sprintf("%s_%s_%d", discussionID, eventType, bucket.Unix())
hash := sha256.Sum256([]byte(data))
return fmt.Sprintf("hmmm_%x", hash[:8]) // Use first 8 bytes for shorter key
}
// IsProcessed checks if an idempotency key has been processed recently
func (im *IdempotencyManager) IsProcessed(key string) bool {
im.mu.RLock()
defer im.mu.RUnlock()
processTime, exists := im.keys[key]
if !exists {
return false
}
// Check if key is still valid (not expired)
return time.Since(processTime) <= im.maxAge
}
// MarkProcessed marks an idempotency key as processed
func (im *IdempotencyManager) MarkProcessed(key string) {
im.mu.Lock()
defer im.mu.Unlock()
im.keys[key] = time.Now()
}
// cleanupExpiredKeys periodically removes expired idempotency keys
func (im *IdempotencyManager) cleanupExpiredKeys() {
ticker := time.NewTicker(im.maxAge / 2) // Cleanup twice as often as expiry
defer ticker.Stop()
for range ticker.C {
im.mu.Lock()
now := time.Now()
expired := make([]string, 0)
for key, processTime := range im.keys {
if now.Sub(processTime) > im.maxAge {
expired = append(expired, key)
}
}
for _, key := range expired {
delete(im.keys, key)
}
if len(expired) > 0 {
log.Printf("🧹 Cleaned up %d expired idempotency keys", len(expired))
}
im.mu.Unlock()
}
}
// DeadLetterQueue handles failed events that need to be retried later
type DeadLetterQueue struct {
queueDir string
mu sync.RWMutex
items map[string]*DLQItem
maxRetries int
}
// DLQItem represents an item in the dead letter queue
type DLQItem struct {
Event SlurpEvent `json:"event"`
FailureReason string `json:"failure_reason"`
RetryCount int `json:"retry_count"`
NextRetryTime time.Time `json:"next_retry_time"`
FirstFailed time.Time `json:"first_failed"`
LastFailed time.Time `json:"last_failed"`
}
// NewDeadLetterQueue creates a new dead letter queue
func NewDeadLetterQueue(queueDir string, maxRetries int) (*DeadLetterQueue, error) {
if err := os.MkdirAll(queueDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create queue directory: %w", err)
}
dlq := &DeadLetterQueue{
queueDir: queueDir,
items: make(map[string]*DLQItem),
maxRetries: maxRetries,
}
// Load existing items from disk
if err := dlq.loadFromDisk(); err != nil {
log.Printf("⚠️ Failed to load DLQ from disk: %v", err)
}
return dlq, nil
}
// Enqueue adds a failed event to the dead letter queue
func (dlq *DeadLetterQueue) Enqueue(event SlurpEvent, reason string) error {
dlq.mu.Lock()
defer dlq.mu.Unlock()
eventID := dlq.generateEventID(event)
now := time.Now()
// Check if event already exists in DLQ
if existing, exists := dlq.items[eventID]; exists {
existing.RetryCount++
existing.FailureReason = reason
existing.LastFailed = now
existing.NextRetryTime = dlq.calculateNextRetry(existing.RetryCount)
log.Printf("💀 Updated DLQ item %s (retry %d/%d)", eventID, existing.RetryCount, dlq.maxRetries)
} else {
// Create new DLQ item
item := &DLQItem{
Event: event,
FailureReason: reason,
RetryCount: 1,
NextRetryTime: dlq.calculateNextRetry(1),
FirstFailed: now,
LastFailed: now,
}
dlq.items[eventID] = item
log.Printf("💀 Added new item to DLQ: %s", eventID)
}
// Persist to disk
return dlq.saveToDisk()
}
// GetReadyItems returns items that are ready for retry
func (dlq *DeadLetterQueue) GetReadyItems() []*DLQItem {
dlq.mu.RLock()
defer dlq.mu.RUnlock()
now := time.Now()
ready := make([]*DLQItem, 0)
for _, item := range dlq.items {
if item.RetryCount <= dlq.maxRetries && now.After(item.NextRetryTime) {
ready = append(ready, item)
}
}
return ready
}
// MarkSuccess removes an item from the DLQ after successful retry
func (dlq *DeadLetterQueue) MarkSuccess(eventID string) error {
dlq.mu.Lock()
defer dlq.mu.Unlock()
delete(dlq.items, eventID)
log.Printf("✅ Removed successfully retried item from DLQ: %s", eventID)
return dlq.saveToDisk()
}
// MarkFailure updates retry count for failed retry attempt
func (dlq *DeadLetterQueue) MarkFailure(eventID string, reason string) error {
dlq.mu.Lock()
defer dlq.mu.Unlock()
if item, exists := dlq.items[eventID]; exists {
item.RetryCount++
item.FailureReason = reason
item.LastFailed = time.Now()
item.NextRetryTime = dlq.calculateNextRetry(item.RetryCount)
if item.RetryCount > dlq.maxRetries {
log.Printf("💀 Item exceeded max retries, keeping in DLQ for manual review: %s", eventID)
}
}
return dlq.saveToDisk()
}
// GetStats returns DLQ statistics
func (dlq *DeadLetterQueue) GetStats() map[string]interface{} {
dlq.mu.RLock()
defer dlq.mu.RUnlock()
ready := 0
exhausted := 0
waiting := 0
now := time.Now()
for _, item := range dlq.items {
if item.RetryCount > dlq.maxRetries {
exhausted++
} else if now.After(item.NextRetryTime) {
ready++
} else {
waiting++
}
}
return map[string]interface{}{
"total_items": len(dlq.items),
"ready_for_retry": ready,
"waiting": waiting,
"exhausted": exhausted,
"max_retries": dlq.maxRetries,
}
}
// calculateNextRetry calculates the next retry time using exponential backoff with jitter
func (dlq *DeadLetterQueue) calculateNextRetry(retryCount int) time.Time {
// Exponential backoff: 2^retryCount minutes with jitter
baseDelay := time.Duration(math.Pow(2, float64(retryCount))) * time.Minute
// Add jitter (±25% random variation)
jitter := time.Duration(rand.Float64()*0.5-0.25) * baseDelay
delay := baseDelay + jitter
// Cap at 1 hour maximum
if delay > time.Hour {
delay = time.Hour
}
return time.Now().Add(delay)
}
// generateEventID creates a unique ID for an event
func (dlq *DeadLetterQueue) generateEventID(event SlurpEvent) string {
data := fmt.Sprintf("%s_%s_%s_%d",
event.EventType,
event.Path,
event.CreatedBy,
event.Timestamp.Unix())
hash := sha256.Sum256([]byte(data))
return fmt.Sprintf("dlq_%x", hash[:8])
}
// saveToDisk persists the DLQ to disk
func (dlq *DeadLetterQueue) saveToDisk() error {
filePath := filepath.Join(dlq.queueDir, "dlq_items.json")
data, err := json.MarshalIndent(dlq.items, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal DLQ items: %w", err)
}
return os.WriteFile(filePath, data, 0644)
}
// loadFromDisk loads the DLQ from disk
func (dlq *DeadLetterQueue) loadFromDisk() error {
filePath := filepath.Join(dlq.queueDir, "dlq_items.json")
data, err := os.ReadFile(filePath)
if err != nil {
if os.IsNotExist(err) {
return nil // No existing queue file, start fresh
}
return fmt.Errorf("failed to read DLQ file: %w", err)
}
return json.Unmarshal(data, &dlq.items)
}
// BackoffStrategy calculates retry delays with exponential backoff and jitter
type BackoffStrategy struct {
initialDelay time.Duration
maxDelay time.Duration
multiplier float64
jitterFactor float64
}
// NewBackoffStrategy creates a new backoff strategy
func NewBackoffStrategy(initialDelay, maxDelay time.Duration, multiplier, jitterFactor float64) *BackoffStrategy {
return &BackoffStrategy{
initialDelay: initialDelay,
maxDelay: maxDelay,
multiplier: multiplier,
jitterFactor: jitterFactor,
}
}
// GetDelay calculates the delay for a given attempt number
func (bs *BackoffStrategy) GetDelay(attempt int) time.Duration {
if attempt <= 0 {
return bs.initialDelay
}
// Exponential backoff
delay := time.Duration(float64(bs.initialDelay) * math.Pow(bs.multiplier, float64(attempt-1)))
// Apply maximum delay cap
if delay > bs.maxDelay {
delay = bs.maxDelay
}
// Add jitter to avoid thundering herd
jitter := time.Duration(rand.Float64()*bs.jitterFactor*2-bs.jitterFactor) * delay
delay += jitter
// Ensure delay is never negative
if delay < 0 {
delay = bs.initialDelay
}
return delay
}

View File

@@ -0,0 +1,439 @@
package integration
import (
"context"
"encoding/json"
"fmt"
"log"
"sync"
"time"
"chorus.services/bzzz/pkg/config"
)
// ReliableSlurpClient wraps SlurpClient with reliability features
type ReliableSlurpClient struct {
baseClient *SlurpClient
circuitBreaker *CircuitBreaker
idempotencyMgr *IdempotencyManager
deadLetterQueue *DeadLetterQueue
backoffStrategy *BackoffStrategy
// Configuration
config config.SlurpConfig
// Background processing
ctx context.Context
cancel context.CancelFunc
retryWorker sync.WaitGroup
// Metrics
metrics *ReliabilityMetrics
metricsMutex sync.RWMutex
}
// ReliabilityMetrics tracks reliability-related metrics
type ReliabilityMetrics struct {
TotalEvents int64 `json:"total_events"`
SuccessfulEvents int64 `json:"successful_events"`
FailedEvents int64 `json:"failed_events"`
DeduplicatedEvents int64 `json:"deduplicated_events"`
CircuitBreakerTrips int64 `json:"circuit_breaker_trips"`
DLQEnqueued int64 `json:"dlq_enqueued"`
DLQRetrySuccesses int64 `json:"dlq_retry_successes"`
DLQRetryFailures int64 `json:"dlq_retry_failures"`
LastEventTime time.Time `json:"last_event_time"`
LastSuccessTime time.Time `json:"last_success_time"`
LastFailureTime time.Time `json:"last_failure_time"`
}
// NewReliableSlurpClient creates a new reliable SLURP client
func NewReliableSlurpClient(ctx context.Context, slurpConfig config.SlurpConfig) (*ReliableSlurpClient, error) {
if !slurpConfig.Enabled {
return nil, fmt.Errorf("SLURP integration is disabled")
}
// Create base client
baseClient := NewSlurpClient(slurpConfig)
// Test connection
if err := baseClient.ValidateConnection(ctx); err != nil {
return nil, fmt.Errorf("failed to validate SLURP connection: %w", err)
}
// Initialize reliability components
circuitBreaker := NewCircuitBreaker(
slurpConfig.Reliability.MaxFailures,
slurpConfig.Reliability.CooldownPeriod,
slurpConfig.Reliability.HalfOpenTimeout,
)
idempotencyMgr := NewIdempotencyManager(slurpConfig.Reliability.IdempotencyWindow)
dlq, err := NewDeadLetterQueue(
slurpConfig.Reliability.DLQDirectory,
slurpConfig.Reliability.MaxRetries,
)
if err != nil {
return nil, fmt.Errorf("failed to initialize dead letter queue: %w", err)
}
backoffStrategy := NewBackoffStrategy(
slurpConfig.Reliability.InitialBackoff,
slurpConfig.Reliability.MaxBackoff,
slurpConfig.Reliability.BackoffMultiplier,
slurpConfig.Reliability.JitterFactor,
)
clientCtx, cancel := context.WithCancel(ctx)
client := &ReliableSlurpClient{
baseClient: baseClient,
circuitBreaker: circuitBreaker,
idempotencyMgr: idempotencyMgr,
deadLetterQueue: dlq,
backoffStrategy: backoffStrategy,
config: slurpConfig,
ctx: clientCtx,
cancel: cancel,
metrics: &ReliabilityMetrics{},
}
// Start background retry worker
client.startRetryWorker()
log.Printf("🛡️ Reliable SLURP client initialized with circuit breaker and DLQ")
return client, nil
}
// CreateEventReliably sends an event with full reliability features
func (rc *ReliableSlurpClient) CreateEventReliably(ctx context.Context, event SlurpEvent) (*EventResponse, error) {
rc.metricsMutex.Lock()
rc.metrics.TotalEvents++
rc.metrics.LastEventTime = time.Now()
rc.metricsMutex.Unlock()
// Generate idempotency key
idempotencyKey := rc.idempotencyMgr.GenerateKey(
rc.extractDiscussionID(event),
event.EventType,
event.Timestamp,
)
// Check if already processed
if rc.idempotencyMgr.IsProcessed(idempotencyKey) {
rc.metricsMutex.Lock()
rc.metrics.DeduplicatedEvents++
rc.metricsMutex.Unlock()
log.Printf("🔄 Event deduplicated with key: %s", idempotencyKey)
return &EventResponse{
Success: true,
EventID: idempotencyKey,
Message: "Event deduplicated",
Timestamp: time.Now(),
}, nil
}
// Check circuit breaker
if !rc.circuitBreaker.CanProceed() {
// Circuit is open, add to DLQ for later retry
err := rc.deadLetterQueue.Enqueue(event, "Circuit breaker open")
if err != nil {
log.Printf("❌ Failed to enqueue event to DLQ: %v", err)
}
rc.metricsMutex.Lock()
rc.metrics.DLQEnqueued++
rc.metricsMutex.Unlock()
return nil, fmt.Errorf("circuit breaker is open, event queued for retry")
}
// Add idempotency header to event metadata
if event.Metadata == nil {
event.Metadata = make(map[string]interface{})
}
event.Metadata["idempotency_key"] = idempotencyKey
// Attempt to send event
resp, err := rc.baseClient.CreateEvent(ctx, event)
if err != nil {
// Record failure in circuit breaker
rc.circuitBreaker.RecordFailure()
// Add to DLQ for retry
if dlqErr := rc.deadLetterQueue.Enqueue(event, err.Error()); dlqErr != nil {
log.Printf("❌ Failed to enqueue failed event to DLQ: %v", dlqErr)
} else {
rc.metricsMutex.Lock()
rc.metrics.DLQEnqueued++
rc.metricsMutex.Unlock()
}
rc.metricsMutex.Lock()
rc.metrics.FailedEvents++
rc.metrics.LastFailureTime = time.Now()
rc.metricsMutex.Unlock()
return nil, fmt.Errorf("failed to send event: %w", err)
}
// Success! Record in circuit breaker and idempotency manager
rc.circuitBreaker.RecordSuccess()
rc.idempotencyMgr.MarkProcessed(idempotencyKey)
rc.metricsMutex.Lock()
rc.metrics.SuccessfulEvents++
rc.metrics.LastSuccessTime = time.Now()
rc.metricsMutex.Unlock()
return resp, nil
}
// CreateEventsBatchReliably sends a batch of events with reliability features
func (rc *ReliableSlurpClient) CreateEventsBatchReliably(ctx context.Context, events []SlurpEvent) (*BatchEventResponse, error) {
rc.metricsMutex.Lock()
rc.metrics.TotalEvents += int64(len(events))
rc.metrics.LastEventTime = time.Now()
rc.metricsMutex.Unlock()
// Check circuit breaker
if !rc.circuitBreaker.CanProceed() {
// Circuit is open, add all events to DLQ
for _, event := range events {
if err := rc.deadLetterQueue.Enqueue(event, "Circuit breaker open"); err != nil {
log.Printf("❌ Failed to enqueue batch event to DLQ: %v", err)
}
}
rc.metricsMutex.Lock()
rc.metrics.DLQEnqueued += int64(len(events))
rc.metricsMutex.Unlock()
return nil, fmt.Errorf("circuit breaker is open, %d events queued for retry", len(events))
}
// Add idempotency keys to all events
processedEvents := make([]SlurpEvent, 0, len(events))
deduplicatedCount := 0
for _, event := range events {
idempotencyKey := rc.idempotencyMgr.GenerateKey(
rc.extractDiscussionID(event),
event.EventType,
event.Timestamp,
)
// Check if already processed
if rc.idempotencyMgr.IsProcessed(idempotencyKey) {
deduplicatedCount++
continue
}
// Add idempotency key to metadata
if event.Metadata == nil {
event.Metadata = make(map[string]interface{})
}
event.Metadata["idempotency_key"] = idempotencyKey
processedEvents = append(processedEvents, event)
}
if deduplicatedCount > 0 {
rc.metricsMutex.Lock()
rc.metrics.DeduplicatedEvents += int64(deduplicatedCount)
rc.metricsMutex.Unlock()
log.Printf("🔄 Deduplicated %d events from batch", deduplicatedCount)
}
if len(processedEvents) == 0 {
return &BatchEventResponse{
Success: true,
ProcessedCount: 0,
FailedCount: 0,
Message: "All events were deduplicated",
Timestamp: time.Now(),
}, nil
}
// Attempt to send batch
resp, err := rc.baseClient.CreateEventsBatch(ctx, processedEvents)
if err != nil {
// Record failure in circuit breaker
rc.circuitBreaker.RecordFailure()
// Add all events to DLQ for retry
for _, event := range processedEvents {
if dlqErr := rc.deadLetterQueue.Enqueue(event, err.Error()); dlqErr != nil {
log.Printf("❌ Failed to enqueue batch event to DLQ: %v", dlqErr)
}
}
rc.metricsMutex.Lock()
rc.metrics.FailedEvents += int64(len(processedEvents))
rc.metrics.DLQEnqueued += int64(len(processedEvents))
rc.metrics.LastFailureTime = time.Now()
rc.metricsMutex.Unlock()
return nil, fmt.Errorf("failed to send batch: %w", err)
}
// Success! Record in circuit breaker and idempotency manager
rc.circuitBreaker.RecordSuccess()
// Mark all events as processed
for _, event := range processedEvents {
if idempotencyKey, exists := event.Metadata["idempotency_key"].(string); exists {
rc.idempotencyMgr.MarkProcessed(idempotencyKey)
}
}
rc.metricsMutex.Lock()
rc.metrics.SuccessfulEvents += int64(resp.ProcessedCount)
rc.metrics.FailedEvents += int64(resp.FailedCount)
rc.metrics.LastSuccessTime = time.Now()
rc.metricsMutex.Unlock()
return resp, nil
}
// GetHealth checks the health of SLURP service and reliability components
func (rc *ReliableSlurpClient) GetHealth(ctx context.Context) (*HealthResponse, error) {
// Try base health check first
health, err := rc.baseClient.GetHealth(ctx)
if err != nil {
rc.circuitBreaker.RecordFailure()
return nil, err
}
rc.circuitBreaker.RecordSuccess()
return health, nil
}
// GetReliabilityStats returns comprehensive reliability statistics
func (rc *ReliableSlurpClient) GetReliabilityStats() map[string]interface{} {
rc.metricsMutex.RLock()
metrics := *rc.metrics
rc.metricsMutex.RUnlock()
stats := map[string]interface{}{
"metrics": metrics,
"circuit_breaker": rc.circuitBreaker.GetStats(),
"dead_letter_queue": rc.deadLetterQueue.GetStats(),
}
return stats
}
// startRetryWorker starts background worker to process DLQ items
func (rc *ReliableSlurpClient) startRetryWorker() {
rc.retryWorker.Add(1)
go func() {
defer rc.retryWorker.Done()
ticker := time.NewTicker(rc.config.Reliability.RetryInterval)
defer ticker.Stop()
log.Printf("🔄 DLQ retry worker started (interval: %v)", rc.config.Reliability.RetryInterval)
for {
select {
case <-rc.ctx.Done():
log.Printf("🛑 DLQ retry worker stopping")
return
case <-ticker.C:
rc.processDLQItems()
}
}
}()
}
// processDLQItems processes items ready for retry from the DLQ
func (rc *ReliableSlurpClient) processDLQItems() {
readyItems := rc.deadLetterQueue.GetReadyItems()
if len(readyItems) == 0 {
return
}
log.Printf("🔄 Processing %d DLQ items ready for retry", len(readyItems))
for _, item := range readyItems {
if rc.ctx.Err() != nil {
break
}
// Check if circuit breaker allows retry
if !rc.circuitBreaker.CanProceed() {
log.Printf("⏸️ Circuit breaker open, skipping DLQ retry")
break
}
// Attempt retry
eventID := rc.deadLetterQueue.generateEventID(item.Event)
_, err := rc.baseClient.CreateEvent(rc.ctx, item.Event)
if err != nil {
// Retry failed
rc.circuitBreaker.RecordFailure()
if markErr := rc.deadLetterQueue.MarkFailure(eventID, err.Error()); markErr != nil {
log.Printf("❌ Failed to mark DLQ failure: %v", markErr)
}
rc.metricsMutex.Lock()
rc.metrics.DLQRetryFailures++
rc.metricsMutex.Unlock()
log.Printf("❌ DLQ retry failed for %s: %v", eventID, err)
} else {
// Retry succeeded
rc.circuitBreaker.RecordSuccess()
if markErr := rc.deadLetterQueue.MarkSuccess(eventID); markErr != nil {
log.Printf("❌ Failed to mark DLQ success: %v", markErr)
}
rc.metricsMutex.Lock()
rc.metrics.DLQRetrySuccesses++
rc.metricsMutex.Unlock()
log.Printf("✅ DLQ retry succeeded for %s", eventID)
}
}
}
// extractDiscussionID extracts discussion ID from event metadata for idempotency key generation
func (rc *ReliableSlurpClient) extractDiscussionID(event SlurpEvent) string {
if event.Metadata == nil {
return "unknown"
}
if discussionID, exists := event.Metadata["discussion_id"]; exists {
if id, ok := discussionID.(string); ok {
return id
}
}
// Fallback to event path if no discussion_id
return event.Path
}
// Close gracefully shuts down the reliable client
func (rc *ReliableSlurpClient) Close() error {
log.Printf("🛑 Shutting down reliable SLURP client...")
// Cancel context to stop retry worker
rc.cancel()
// Wait for retry worker to finish
rc.retryWorker.Wait()
// Close base client
return rc.baseClient.Close()
}