Integrate BACKBEAT SDK and resolve KACHING license validation
Major integrations and fixes: - Added BACKBEAT SDK integration for P2P operation timing - Implemented beat-aware status tracking for distributed operations - Added Docker secrets support for secure license management - Resolved KACHING license validation via HTTPS/TLS - Updated docker-compose configuration for clean stack deployment - Disabled rollback policies to prevent deployment failures - Added license credential storage (CHORUS-DEV-MULTI-001) Technical improvements: - BACKBEAT P2P operation tracking with phase management - Enhanced configuration system with file-based secrets - Improved error handling for license validation - Clean separation of KACHING and CHORUS deployment stacks 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
426
vendor/github.com/chorus-services/backbeat/pkg/sdk/internal.go
generated
vendored
Normal file
426
vendor/github.com/chorus-services/backbeat/pkg/sdk/internal.go
generated
vendored
Normal file
@@ -0,0 +1,426 @@
|
||||
package sdk
|
||||
|
||||
import (
|
||||
"crypto/ed25519"
|
||||
"crypto/sha256"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/nats-io/nats.go"
|
||||
)
|
||||
|
||||
// connect establishes connection to NATS with retry logic
|
||||
func (c *client) connect() error {
|
||||
opts := []nats.Option{
|
||||
nats.ReconnectWait(c.config.ReconnectDelay),
|
||||
nats.MaxReconnects(c.config.MaxReconnects),
|
||||
nats.ReconnectHandler(func(nc *nats.Conn) {
|
||||
c.reconnectCount++
|
||||
c.metrics.RecordConnection()
|
||||
c.config.Logger.Info("NATS reconnected",
|
||||
"reconnect_count", c.reconnectCount,
|
||||
"url", nc.ConnectedUrl())
|
||||
}),
|
||||
nats.DisconnectErrHandler(func(nc *nats.Conn, err error) {
|
||||
if err != nil {
|
||||
c.metrics.RecordDisconnection()
|
||||
c.addError(fmt.Sprintf("NATS disconnected: %v", err))
|
||||
c.config.Logger.Warn("NATS disconnected", "error", err)
|
||||
}
|
||||
}),
|
||||
nats.ClosedHandler(func(nc *nats.Conn) {
|
||||
c.metrics.RecordDisconnection()
|
||||
c.config.Logger.Info("NATS connection closed")
|
||||
}),
|
||||
}
|
||||
|
||||
nc, err := nats.Connect(c.config.NATSUrl, opts...)
|
||||
if err != nil {
|
||||
c.metrics.RecordError(fmt.Sprintf("NATS connection failed: %v", err))
|
||||
return fmt.Errorf("failed to connect to NATS: %w", err)
|
||||
}
|
||||
|
||||
c.nc = nc
|
||||
c.metrics.RecordConnection()
|
||||
c.config.Logger.Info("Connected to NATS", "url", nc.ConnectedUrl())
|
||||
return nil
|
||||
}
|
||||
|
||||
// beatSubscriptionLoop handles beat frame subscription with jitter tolerance
|
||||
func (c *client) beatSubscriptionLoop() {
|
||||
defer c.wg.Done()
|
||||
|
||||
subject := fmt.Sprintf("backbeat.beat.%s", c.config.ClusterID)
|
||||
|
||||
// Subscribe to beat frames
|
||||
sub, err := c.nc.Subscribe(subject, c.handleBeatFrame)
|
||||
if err != nil {
|
||||
c.addError(fmt.Sprintf("failed to subscribe to beats: %v", err))
|
||||
c.config.Logger.Error("Failed to subscribe to beats", "error", err)
|
||||
return
|
||||
}
|
||||
defer sub.Unsubscribe()
|
||||
|
||||
c.config.Logger.Info("Beat subscription active", "subject", subject)
|
||||
|
||||
// Start local degradation timer for fallback timing
|
||||
localTicker := time.NewTicker(1 * time.Second) // Default 60 BPM fallback
|
||||
defer localTicker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-c.ctx.Done():
|
||||
return
|
||||
case <-localTicker.C:
|
||||
// Local degradation mode - generate synthetic beats if no recent beats
|
||||
c.beatMutex.RLock()
|
||||
timeSinceLastBeat := time.Since(c.lastBeatTime)
|
||||
c.beatMutex.RUnlock()
|
||||
|
||||
// If more than 2 beat intervals have passed, enter degradation mode
|
||||
if timeSinceLastBeat > 2*time.Second {
|
||||
if !c.localDegradation {
|
||||
c.localDegradation = true
|
||||
c.config.Logger.Warn("Entering local degradation mode",
|
||||
"time_since_last_beat", timeSinceLastBeat)
|
||||
}
|
||||
|
||||
c.handleLocalDegradationBeat()
|
||||
c.metrics.RecordLocalDegradation(timeSinceLastBeat)
|
||||
} else if c.localDegradation {
|
||||
// Exit degradation mode
|
||||
c.localDegradation = false
|
||||
c.config.Logger.Info("Exiting local degradation mode")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleBeatFrame processes incoming beat frames with jitter tolerance
|
||||
func (c *client) handleBeatFrame(msg *nats.Msg) {
|
||||
var beatFrame BeatFrame
|
||||
if err := json.Unmarshal(msg.Data, &beatFrame); err != nil {
|
||||
c.addError(fmt.Sprintf("failed to unmarshal beat frame: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
// Validate beat frame
|
||||
if beatFrame.Type != "backbeat.beatframe.v1" {
|
||||
c.addError(fmt.Sprintf("invalid beat frame type: %s", beatFrame.Type))
|
||||
return
|
||||
}
|
||||
|
||||
// Check for jitter tolerance
|
||||
now := time.Now()
|
||||
expectedTime := beatFrame.DeadlineAt.Add(-c.getBeatDuration()) // Beat should arrive one duration before deadline
|
||||
jitter := now.Sub(expectedTime)
|
||||
if jitter.Abs() > c.config.JitterTolerance {
|
||||
c.config.Logger.Debug("Beat jitter detected",
|
||||
"jitter", jitter,
|
||||
"tolerance", c.config.JitterTolerance,
|
||||
"beat_index", beatFrame.BeatIndex)
|
||||
}
|
||||
|
||||
// Update internal state
|
||||
c.beatMutex.Lock()
|
||||
c.currentBeat = beatFrame.BeatIndex
|
||||
c.currentWindow = beatFrame.WindowID
|
||||
c.currentHLC = beatFrame.HLC
|
||||
|
||||
// Track tempo changes and calculate actual BPM
|
||||
if c.currentTempo != beatFrame.TempoBPM {
|
||||
c.lastTempo = c.currentTempo
|
||||
c.currentTempo = beatFrame.TempoBPM
|
||||
}
|
||||
|
||||
// Calculate actual BPM from inter-beat timing
|
||||
actualBPM := 60.0 // Default
|
||||
if !c.lastBeatTime.IsZero() {
|
||||
interBeatDuration := now.Sub(c.lastBeatTime)
|
||||
if interBeatDuration > 0 {
|
||||
actualBPM = 60.0 / interBeatDuration.Seconds()
|
||||
}
|
||||
}
|
||||
|
||||
// Record tempo sample for drift analysis
|
||||
sample := tempoSample{
|
||||
BeatIndex: beatFrame.BeatIndex,
|
||||
Tempo: beatFrame.TempoBPM,
|
||||
MeasuredTime: now,
|
||||
ActualBPM: actualBPM,
|
||||
}
|
||||
|
||||
c.tempoHistory = append(c.tempoHistory, sample)
|
||||
// Keep only last 100 samples
|
||||
if len(c.tempoHistory) > 100 {
|
||||
c.tempoHistory = c.tempoHistory[1:]
|
||||
}
|
||||
|
||||
c.lastBeatTime = now
|
||||
c.beatMutex.Unlock()
|
||||
|
||||
// Record beat metrics
|
||||
c.metrics.RecordBeat(beatFrame.DeadlineAt.Add(-c.getBeatDuration()), now, beatFrame.Downbeat)
|
||||
|
||||
// If we were in local degradation mode, exit it
|
||||
if c.localDegradation {
|
||||
c.localDegradation = false
|
||||
c.config.Logger.Info("Exiting local degradation mode - beat received")
|
||||
}
|
||||
|
||||
// Execute beat callbacks with error handling
|
||||
c.callbackMutex.RLock()
|
||||
beatCallbacks := make([]func(BeatFrame), len(c.beatCallbacks))
|
||||
copy(beatCallbacks, c.beatCallbacks)
|
||||
|
||||
var downbeatCallbacks []func(BeatFrame)
|
||||
if beatFrame.Downbeat {
|
||||
downbeatCallbacks = make([]func(BeatFrame), len(c.downbeatCallbacks))
|
||||
copy(downbeatCallbacks, c.downbeatCallbacks)
|
||||
}
|
||||
c.callbackMutex.RUnlock()
|
||||
|
||||
// Execute callbacks in separate goroutines to prevent blocking
|
||||
for _, callback := range beatCallbacks {
|
||||
go c.safeExecuteCallback(callback, beatFrame, "beat")
|
||||
}
|
||||
|
||||
if beatFrame.Downbeat {
|
||||
for _, callback := range downbeatCallbacks {
|
||||
go c.safeExecuteCallback(callback, beatFrame, "downbeat")
|
||||
}
|
||||
}
|
||||
|
||||
c.config.Logger.Debug("Beat processed",
|
||||
"beat_index", beatFrame.BeatIndex,
|
||||
"downbeat", beatFrame.Downbeat,
|
||||
"phase", beatFrame.Phase,
|
||||
"window_id", beatFrame.WindowID)
|
||||
}
|
||||
|
||||
// handleLocalDegradationBeat generates synthetic beats during network issues
|
||||
func (c *client) handleLocalDegradationBeat() {
|
||||
c.beatMutex.Lock()
|
||||
c.currentBeat++
|
||||
|
||||
// Generate synthetic beat frame
|
||||
now := time.Now()
|
||||
beatFrame := BeatFrame{
|
||||
Type: "backbeat.beatframe.v1",
|
||||
ClusterID: c.config.ClusterID,
|
||||
BeatIndex: c.currentBeat,
|
||||
Downbeat: (c.currentBeat-1)%4 == 0, // Assume 4/4 time signature
|
||||
Phase: "degraded",
|
||||
HLC: fmt.Sprintf("%d-0", now.UnixNano()),
|
||||
DeadlineAt: now.Add(time.Second), // 1 second deadline in degradation
|
||||
TempoBPM: 2, // Default 2 BPM (30-second beats) - reasonable for distributed systems
|
||||
WindowID: c.generateDegradedWindowID(c.currentBeat),
|
||||
}
|
||||
|
||||
c.currentWindow = beatFrame.WindowID
|
||||
c.currentHLC = beatFrame.HLC
|
||||
c.lastBeatTime = now
|
||||
c.beatMutex.Unlock()
|
||||
|
||||
// Execute callbacks same as normal beats
|
||||
c.callbackMutex.RLock()
|
||||
beatCallbacks := make([]func(BeatFrame), len(c.beatCallbacks))
|
||||
copy(beatCallbacks, c.beatCallbacks)
|
||||
|
||||
var downbeatCallbacks []func(BeatFrame)
|
||||
if beatFrame.Downbeat {
|
||||
downbeatCallbacks = make([]func(BeatFrame), len(c.downbeatCallbacks))
|
||||
copy(downbeatCallbacks, c.downbeatCallbacks)
|
||||
}
|
||||
c.callbackMutex.RUnlock()
|
||||
|
||||
for _, callback := range beatCallbacks {
|
||||
go c.safeExecuteCallback(callback, beatFrame, "degraded-beat")
|
||||
}
|
||||
|
||||
if beatFrame.Downbeat {
|
||||
for _, callback := range downbeatCallbacks {
|
||||
go c.safeExecuteCallback(callback, beatFrame, "degraded-downbeat")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// safeExecuteCallback executes a callback with panic recovery
|
||||
func (c *client) safeExecuteCallback(callback func(BeatFrame), beat BeatFrame, callbackType string) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
errMsg := fmt.Sprintf("panic in %s callback: %v", callbackType, r)
|
||||
c.addError(errMsg)
|
||||
c.metrics.RecordError(errMsg)
|
||||
c.config.Logger.Error("Callback panic recovered",
|
||||
"type", callbackType,
|
||||
"panic", r,
|
||||
"beat_index", beat.BeatIndex)
|
||||
}
|
||||
}()
|
||||
|
||||
start := time.Now()
|
||||
callback(beat)
|
||||
duration := time.Since(start)
|
||||
|
||||
// Record callback latency metrics
|
||||
c.metrics.RecordCallbackLatency(duration, callbackType)
|
||||
|
||||
// Warn about slow callbacks
|
||||
if duration > 5*time.Millisecond {
|
||||
c.config.Logger.Warn("Slow callback detected",
|
||||
"type", callbackType,
|
||||
"duration", duration,
|
||||
"beat_index", beat.BeatIndex)
|
||||
}
|
||||
}
|
||||
|
||||
// validateStatusClaim validates a status claim
|
||||
func (c *client) validateStatusClaim(claim *StatusClaim) error {
|
||||
if claim.State == "" {
|
||||
return fmt.Errorf("state is required")
|
||||
}
|
||||
|
||||
validStates := map[string]bool{
|
||||
"executing": true,
|
||||
"planning": true,
|
||||
"waiting": true,
|
||||
"review": true,
|
||||
"done": true,
|
||||
"failed": true,
|
||||
}
|
||||
|
||||
if !validStates[claim.State] {
|
||||
return fmt.Errorf("invalid state: must be one of [executing, planning, waiting, review, done, failed], got '%s'", claim.State)
|
||||
}
|
||||
|
||||
if claim.Progress < 0.0 || claim.Progress > 1.0 {
|
||||
return fmt.Errorf("progress must be between 0.0 and 1.0, got %f", claim.Progress)
|
||||
}
|
||||
|
||||
if claim.BeatsLeft < 0 {
|
||||
return fmt.Errorf("beats_left must be non-negative, got %d", claim.BeatsLeft)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// signStatusClaim signs a status claim using Ed25519 (BACKBEAT-REQ-044)
|
||||
func (c *client) signStatusClaim(claim *StatusClaim) error {
|
||||
if c.config.SigningKey == nil {
|
||||
return fmt.Errorf("signing key not configured")
|
||||
}
|
||||
|
||||
// Create canonical representation for signing
|
||||
canonical, err := json.Marshal(claim)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal claim for signing: %w", err)
|
||||
}
|
||||
|
||||
// Sign the canonical representation
|
||||
signature := ed25519.Sign(c.config.SigningKey, canonical)
|
||||
|
||||
// Add signature to notes (temporary until proper signature field added)
|
||||
claim.Notes += fmt.Sprintf(" [sig:%x]", signature)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// createHeaders creates NATS headers with required security information
|
||||
func (c *client) createHeaders() nats.Header {
|
||||
headers := make(nats.Header)
|
||||
|
||||
// Add window ID header (BACKBEAT-REQ-044)
|
||||
headers.Add("x-window-id", c.GetCurrentWindow())
|
||||
|
||||
// Add HLC header (BACKBEAT-REQ-044)
|
||||
headers.Add("x-hlc", c.getCurrentHLC())
|
||||
|
||||
// Add agent ID for routing
|
||||
headers.Add("x-agent-id", c.config.AgentID)
|
||||
|
||||
return headers
|
||||
}
|
||||
|
||||
// getCurrentHLC returns the current HLC timestamp
|
||||
func (c *client) getCurrentHLC() string {
|
||||
c.beatMutex.RLock()
|
||||
defer c.beatMutex.RUnlock()
|
||||
|
||||
if c.currentHLC != "" {
|
||||
return c.currentHLC
|
||||
}
|
||||
|
||||
// Generate fallback HLC
|
||||
return fmt.Sprintf("%d-0", time.Now().UnixNano())
|
||||
}
|
||||
|
||||
// getBeatDuration calculates the duration of a beat based on current tempo
|
||||
func (c *client) getBeatDuration() time.Duration {
|
||||
c.beatMutex.RLock()
|
||||
tempo := c.currentTempo
|
||||
c.beatMutex.RUnlock()
|
||||
|
||||
if tempo <= 0 {
|
||||
tempo = 60 // Default to 60 BPM if no tempo information available
|
||||
}
|
||||
|
||||
// Calculate beat duration: 60 seconds / BPM = seconds per beat
|
||||
return time.Duration(60.0/float64(tempo)*1000) * time.Millisecond
|
||||
}
|
||||
|
||||
// generateDegradedWindowID generates a window ID for degraded mode
|
||||
func (c *client) generateDegradedWindowID(beatIndex int64) string {
|
||||
// Use similar algorithm to regular window ID but mark as degraded
|
||||
input := fmt.Sprintf("%s:degraded:%d", c.config.ClusterID, beatIndex/4) // Assume 4-beat bars
|
||||
hash := sha256.Sum256([]byte(input))
|
||||
return fmt.Sprintf("deg-%x", hash)[:32]
|
||||
}
|
||||
|
||||
// addError adds an error to the error list with deduplication
|
||||
func (c *client) addError(err string) {
|
||||
c.errorMutex.Lock()
|
||||
defer c.errorMutex.Unlock()
|
||||
|
||||
// Keep only the last 10 errors to prevent memory leaks
|
||||
if len(c.errors) >= 10 {
|
||||
c.errors = c.errors[1:]
|
||||
}
|
||||
|
||||
timestampedErr := fmt.Sprintf("[%s] %s", time.Now().Format("15:04:05"), err)
|
||||
c.errors = append(c.errors, timestampedErr)
|
||||
|
||||
// Record error in metrics
|
||||
c.metrics.RecordError(timestampedErr)
|
||||
}
|
||||
|
||||
// Legacy compatibility functions for BACKBEAT-REQ-043
|
||||
|
||||
// ConvertLegacyBeat converts legacy {bar,beat} to beat_index with warning
|
||||
func (c *client) ConvertLegacyBeat(bar, beat int) int64 {
|
||||
c.legacyMutex.Lock()
|
||||
if !c.legacyWarned {
|
||||
c.config.Logger.Warn("Legacy {bar,beat} format detected - please migrate to beat_index",
|
||||
"bar", bar, "beat", beat)
|
||||
c.legacyWarned = true
|
||||
}
|
||||
c.legacyMutex.Unlock()
|
||||
|
||||
// Convert assuming 4 beats per bar (standard)
|
||||
return int64((bar-1)*4 + beat)
|
||||
}
|
||||
|
||||
// GetLegacyBeatInfo converts current beat_index to legacy {bar,beat} format
|
||||
func (c *client) GetLegacyBeatInfo() LegacyBeatInfo {
|
||||
beatIndex := c.GetCurrentBeat()
|
||||
if beatIndex <= 0 {
|
||||
return LegacyBeatInfo{Bar: 1, Beat: 1}
|
||||
}
|
||||
|
||||
// Convert assuming 4 beats per bar
|
||||
bar := int((beatIndex-1)/4) + 1
|
||||
beat := int((beatIndex-1)%4) + 1
|
||||
|
||||
return LegacyBeatInfo{Bar: bar, Beat: beat}
|
||||
}
|
||||
Reference in New Issue
Block a user