Integrate BACKBEAT SDK and resolve KACHING license validation
Major integrations and fixes: - Added BACKBEAT SDK integration for P2P operation timing - Implemented beat-aware status tracking for distributed operations - Added Docker secrets support for secure license management - Resolved KACHING license validation via HTTPS/TLS - Updated docker-compose configuration for clean stack deployment - Disabled rollback policies to prevent deployment failures - Added license credential storage (CHORUS-DEV-MULTI-001) Technical improvements: - BACKBEAT P2P operation tracking with phase management - Enhanced configuration system with file-based secrets - Improved error handling for license validation - Clean separation of KACHING and CHORUS deployment stacks 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
480
vendor/github.com/chorus-services/backbeat/pkg/sdk/client.go
generated
vendored
Normal file
480
vendor/github.com/chorus-services/backbeat/pkg/sdk/client.go
generated
vendored
Normal file
@@ -0,0 +1,480 @@
|
||||
// Package sdk provides the BACKBEAT Go SDK for enabling CHORUS services
|
||||
// to become BACKBEAT-aware with beat synchronization and status emission.
|
||||
package sdk
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/nats-io/nats.go"
|
||||
)
|
||||
|
||||
// Client interface defines the core BACKBEAT SDK functionality
|
||||
// Implements BACKBEAT-REQ-040, 041, 042, 043, 044
|
||||
type Client interface {
|
||||
// Beat subscription (BACKBEAT-REQ-040)
|
||||
OnBeat(callback func(BeatFrame)) error
|
||||
OnDownbeat(callback func(BeatFrame)) error
|
||||
|
||||
// Status emission (BACKBEAT-REQ-041)
|
||||
EmitStatusClaim(claim StatusClaim) error
|
||||
|
||||
// Beat budgets (BACKBEAT-REQ-042)
|
||||
WithBeatBudget(n int, fn func() error) error
|
||||
|
||||
// Utilities
|
||||
GetCurrentBeat() int64
|
||||
GetCurrentWindow() string
|
||||
IsInWindow(windowID string) bool
|
||||
GetCurrentTempo() int
|
||||
GetTempoDrift() time.Duration
|
||||
|
||||
// Lifecycle management
|
||||
Start(ctx context.Context) error
|
||||
Stop() error
|
||||
Health() HealthStatus
|
||||
}
|
||||
|
||||
// Config represents the SDK configuration
|
||||
type Config struct {
|
||||
ClusterID string // BACKBEAT cluster identifier
|
||||
AgentID string // Unique agent identifier
|
||||
NATSUrl string // NATS connection URL
|
||||
SigningKey ed25519.PrivateKey // Ed25519 private key for signing (BACKBEAT-REQ-044)
|
||||
Logger *slog.Logger // Structured logger
|
||||
JitterTolerance time.Duration // Maximum jitter tolerance (default: 50ms)
|
||||
ReconnectDelay time.Duration // NATS reconnection delay (default: 1s)
|
||||
MaxReconnects int // Maximum reconnection attempts (default: -1 for infinite)
|
||||
}
|
||||
|
||||
// DefaultConfig returns a Config with sensible defaults
|
||||
func DefaultConfig() *Config {
|
||||
return &Config{
|
||||
JitterTolerance: 50 * time.Millisecond,
|
||||
ReconnectDelay: 1 * time.Second,
|
||||
MaxReconnects: -1, // Infinite reconnects
|
||||
Logger: slog.Default(),
|
||||
}
|
||||
}
|
||||
|
||||
// BeatFrame represents a beat frame with timing information
|
||||
type BeatFrame struct {
|
||||
Type string `json:"type"`
|
||||
ClusterID string `json:"cluster_id"`
|
||||
BeatIndex int64 `json:"beat_index"`
|
||||
Downbeat bool `json:"downbeat"`
|
||||
Phase string `json:"phase"`
|
||||
HLC string `json:"hlc"`
|
||||
DeadlineAt time.Time `json:"deadline_at"`
|
||||
TempoBPM int `json:"tempo_bpm"`
|
||||
WindowID string `json:"window_id"`
|
||||
}
|
||||
|
||||
// StatusClaim represents a status claim emission
|
||||
type StatusClaim struct {
|
||||
// Auto-populated by SDK
|
||||
Type string `json:"type"` // Always "backbeat.statusclaim.v1"
|
||||
AgentID string `json:"agent_id"` // Auto-populated from config
|
||||
TaskID string `json:"task_id"` // Auto-generated if not provided
|
||||
BeatIndex int64 `json:"beat_index"` // Auto-populated from current beat
|
||||
HLC string `json:"hlc"` // Auto-populated from current HLC
|
||||
|
||||
// User-provided
|
||||
State string `json:"state"` // executing|planning|waiting|review|done|failed
|
||||
WaitFor []string `json:"wait_for,omitempty"` // refs (e.g., hmmm://thread/...)
|
||||
BeatsLeft int `json:"beats_left"` // estimated beats remaining
|
||||
Progress float64 `json:"progress"` // progress ratio (0.0-1.0)
|
||||
Notes string `json:"notes"` // status description
|
||||
}
|
||||
|
||||
// HealthStatus represents the current health of the SDK client
|
||||
type HealthStatus struct {
|
||||
Connected bool `json:"connected"`
|
||||
LastBeat int64 `json:"last_beat"`
|
||||
LastBeatTime time.Time `json:"last_beat_time"`
|
||||
TimeDrift time.Duration `json:"time_drift"`
|
||||
ReconnectCount int `json:"reconnect_count"`
|
||||
LocalDegradation bool `json:"local_degradation"`
|
||||
CurrentTempo int `json:"current_tempo"`
|
||||
TempoDrift time.Duration `json:"tempo_drift"`
|
||||
MeasuredBPM float64 `json:"measured_bpm"`
|
||||
Errors []string `json:"errors,omitempty"`
|
||||
}
|
||||
|
||||
// LegacyBeatInfo represents legacy {bar,beat} information
|
||||
// For BACKBEAT-REQ-043 compatibility
|
||||
type LegacyBeatInfo struct {
|
||||
Bar int `json:"bar"`
|
||||
Beat int `json:"beat"`
|
||||
}
|
||||
|
||||
// tempoSample represents a tempo measurement for drift calculation
|
||||
type tempoSample struct {
|
||||
BeatIndex int64
|
||||
Tempo int
|
||||
MeasuredTime time.Time
|
||||
ActualBPM float64 // Measured BPM based on inter-beat timing
|
||||
}
|
||||
|
||||
// client implements the Client interface
|
||||
type client struct {
|
||||
config *Config
|
||||
nc *nats.Conn
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
wg sync.WaitGroup
|
||||
|
||||
// Beat tracking
|
||||
currentBeat int64
|
||||
currentWindow string
|
||||
currentHLC string
|
||||
lastBeatTime time.Time
|
||||
currentTempo int // Current tempo in BPM
|
||||
lastTempo int // Last known tempo for drift calculation
|
||||
tempoHistory []tempoSample // History for drift calculation
|
||||
beatMutex sync.RWMutex
|
||||
|
||||
// Callbacks
|
||||
beatCallbacks []func(BeatFrame)
|
||||
downbeatCallbacks []func(BeatFrame)
|
||||
callbackMutex sync.RWMutex
|
||||
|
||||
// Health and metrics
|
||||
reconnectCount int
|
||||
localDegradation bool
|
||||
errors []string
|
||||
errorMutex sync.RWMutex
|
||||
metrics *Metrics
|
||||
|
||||
// Beat budget tracking
|
||||
budgetContexts map[string]context.CancelFunc
|
||||
budgetMutex sync.Mutex
|
||||
|
||||
// Legacy compatibility
|
||||
legacyWarned bool
|
||||
legacyMutex sync.Mutex
|
||||
}
|
||||
|
||||
// NewClient creates a new BACKBEAT SDK client
|
||||
func NewClient(config *Config) Client {
|
||||
if config.Logger == nil {
|
||||
config.Logger = slog.Default()
|
||||
}
|
||||
|
||||
c := &client{
|
||||
config: config,
|
||||
beatCallbacks: make([]func(BeatFrame), 0),
|
||||
downbeatCallbacks: make([]func(BeatFrame), 0),
|
||||
budgetContexts: make(map[string]context.CancelFunc),
|
||||
errors: make([]string, 0),
|
||||
tempoHistory: make([]tempoSample, 0, 100),
|
||||
currentTempo: 60, // Default to 60 BPM
|
||||
}
|
||||
|
||||
// Initialize metrics
|
||||
prefix := fmt.Sprintf("backbeat.sdk.%s", config.AgentID)
|
||||
c.metrics = NewMetrics(prefix)
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
// Start initializes the client and begins beat synchronization
|
||||
func (c *client) Start(ctx context.Context) error {
|
||||
c.ctx, c.cancel = context.WithCancel(ctx)
|
||||
|
||||
if err := c.connect(); err != nil {
|
||||
return fmt.Errorf("failed to connect to NATS: %w", err)
|
||||
}
|
||||
|
||||
c.wg.Add(1)
|
||||
go c.beatSubscriptionLoop()
|
||||
|
||||
c.config.Logger.Info("BACKBEAT SDK client started",
|
||||
slog.String("cluster_id", c.config.ClusterID),
|
||||
slog.String("agent_id", c.config.AgentID))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop gracefully stops the client
|
||||
func (c *client) Stop() error {
|
||||
if c.cancel != nil {
|
||||
c.cancel()
|
||||
}
|
||||
|
||||
// Cancel all active beat budgets
|
||||
c.budgetMutex.Lock()
|
||||
for id, cancel := range c.budgetContexts {
|
||||
cancel()
|
||||
delete(c.budgetContexts, id)
|
||||
}
|
||||
c.budgetMutex.Unlock()
|
||||
|
||||
if c.nc != nil {
|
||||
c.nc.Close()
|
||||
}
|
||||
|
||||
c.wg.Wait()
|
||||
|
||||
c.config.Logger.Info("BACKBEAT SDK client stopped")
|
||||
return nil
|
||||
}
|
||||
|
||||
// OnBeat registers a callback for beat events (BACKBEAT-REQ-040)
|
||||
func (c *client) OnBeat(callback func(BeatFrame)) error {
|
||||
if callback == nil {
|
||||
return fmt.Errorf("callback cannot be nil")
|
||||
}
|
||||
|
||||
c.callbackMutex.Lock()
|
||||
defer c.callbackMutex.Unlock()
|
||||
|
||||
c.beatCallbacks = append(c.beatCallbacks, callback)
|
||||
return nil
|
||||
}
|
||||
|
||||
// OnDownbeat registers a callback for downbeat events (BACKBEAT-REQ-040)
|
||||
func (c *client) OnDownbeat(callback func(BeatFrame)) error {
|
||||
if callback == nil {
|
||||
return fmt.Errorf("callback cannot be nil")
|
||||
}
|
||||
|
||||
c.callbackMutex.Lock()
|
||||
defer c.callbackMutex.Unlock()
|
||||
|
||||
c.downbeatCallbacks = append(c.downbeatCallbacks, callback)
|
||||
return nil
|
||||
}
|
||||
|
||||
// EmitStatusClaim emits a status claim (BACKBEAT-REQ-041)
|
||||
func (c *client) EmitStatusClaim(claim StatusClaim) error {
|
||||
// Auto-populate required fields
|
||||
claim.Type = "backbeat.statusclaim.v1"
|
||||
claim.AgentID = c.config.AgentID
|
||||
claim.BeatIndex = c.GetCurrentBeat()
|
||||
claim.HLC = c.getCurrentHLC()
|
||||
|
||||
// Auto-generate task ID if not provided
|
||||
if claim.TaskID == "" {
|
||||
claim.TaskID = fmt.Sprintf("task:%s", uuid.New().String()[:8])
|
||||
}
|
||||
|
||||
// Validate the claim
|
||||
if err := c.validateStatusClaim(&claim); err != nil {
|
||||
return fmt.Errorf("invalid status claim: %w", err)
|
||||
}
|
||||
|
||||
// Sign the claim if signing key is available (BACKBEAT-REQ-044)
|
||||
if c.config.SigningKey != nil {
|
||||
if err := c.signStatusClaim(&claim); err != nil {
|
||||
return fmt.Errorf("failed to sign status claim: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Publish to NATS
|
||||
data, err := json.Marshal(claim)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal status claim: %w", err)
|
||||
}
|
||||
|
||||
subject := fmt.Sprintf("backbeat.status.%s", c.config.ClusterID)
|
||||
headers := c.createHeaders()
|
||||
|
||||
msg := &nats.Msg{
|
||||
Subject: subject,
|
||||
Data: data,
|
||||
Header: headers,
|
||||
}
|
||||
|
||||
if err := c.nc.PublishMsg(msg); err != nil {
|
||||
c.addError(fmt.Sprintf("failed to publish status claim: %v", err))
|
||||
c.metrics.RecordStatusClaim(false)
|
||||
return fmt.Errorf("failed to publish status claim: %w", err)
|
||||
}
|
||||
|
||||
c.metrics.RecordStatusClaim(true)
|
||||
c.config.Logger.Debug("Status claim emitted",
|
||||
slog.String("agent_id", claim.AgentID),
|
||||
slog.String("task_id", claim.TaskID),
|
||||
slog.String("state", claim.State),
|
||||
slog.Int64("beat_index", claim.BeatIndex))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// WithBeatBudget executes a function with a beat-based timeout (BACKBEAT-REQ-042)
|
||||
func (c *client) WithBeatBudget(n int, fn func() error) error {
|
||||
if n <= 0 {
|
||||
return fmt.Errorf("beat budget must be positive, got %d", n)
|
||||
}
|
||||
|
||||
// Calculate timeout based on current tempo
|
||||
currentBeat := c.GetCurrentBeat()
|
||||
beatDuration := c.getBeatDuration()
|
||||
timeout := time.Duration(n) * beatDuration
|
||||
|
||||
// Use background context if client context is not set (for testing)
|
||||
baseCtx := c.ctx
|
||||
if baseCtx == nil {
|
||||
baseCtx = context.Background()
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(baseCtx, timeout)
|
||||
defer cancel()
|
||||
|
||||
// Track the budget context for cancellation
|
||||
budgetID := uuid.New().String()
|
||||
c.budgetMutex.Lock()
|
||||
c.budgetContexts[budgetID] = cancel
|
||||
c.budgetMutex.Unlock()
|
||||
|
||||
// Record budget creation
|
||||
c.metrics.RecordBudgetCreated()
|
||||
|
||||
defer func() {
|
||||
c.budgetMutex.Lock()
|
||||
delete(c.budgetContexts, budgetID)
|
||||
c.budgetMutex.Unlock()
|
||||
}()
|
||||
|
||||
// Execute function with timeout
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
done <- fn()
|
||||
}()
|
||||
|
||||
select {
|
||||
case err := <-done:
|
||||
c.metrics.RecordBudgetCompleted(false) // Not timed out
|
||||
if err != nil {
|
||||
c.config.Logger.Debug("Beat budget function completed with error",
|
||||
slog.Int("budget", n),
|
||||
slog.Int64("start_beat", currentBeat),
|
||||
slog.String("error", err.Error()))
|
||||
} else {
|
||||
c.config.Logger.Debug("Beat budget function completed successfully",
|
||||
slog.Int("budget", n),
|
||||
slog.Int64("start_beat", currentBeat))
|
||||
}
|
||||
return err
|
||||
case <-ctx.Done():
|
||||
c.metrics.RecordBudgetCompleted(true) // Timed out
|
||||
c.config.Logger.Warn("Beat budget exceeded",
|
||||
slog.Int("budget", n),
|
||||
slog.Int64("start_beat", currentBeat),
|
||||
slog.Duration("timeout", timeout))
|
||||
return fmt.Errorf("beat budget of %d beats exceeded", n)
|
||||
}
|
||||
}
|
||||
|
||||
// GetCurrentBeat returns the current beat index
|
||||
func (c *client) GetCurrentBeat() int64 {
|
||||
c.beatMutex.RLock()
|
||||
defer c.beatMutex.RUnlock()
|
||||
return c.currentBeat
|
||||
}
|
||||
|
||||
// GetCurrentWindow returns the current window ID
|
||||
func (c *client) GetCurrentWindow() string {
|
||||
c.beatMutex.RLock()
|
||||
defer c.beatMutex.RUnlock()
|
||||
return c.currentWindow
|
||||
}
|
||||
|
||||
// IsInWindow checks if we're currently in the specified window
|
||||
func (c *client) IsInWindow(windowID string) bool {
|
||||
return c.GetCurrentWindow() == windowID
|
||||
}
|
||||
|
||||
// GetCurrentTempo returns the current tempo in BPM
|
||||
func (c *client) GetCurrentTempo() int {
|
||||
c.beatMutex.RLock()
|
||||
defer c.beatMutex.RUnlock()
|
||||
return c.currentTempo
|
||||
}
|
||||
|
||||
// GetTempoDrift calculates the drift between expected and actual tempo
|
||||
func (c *client) GetTempoDrift() time.Duration {
|
||||
c.beatMutex.RLock()
|
||||
defer c.beatMutex.RUnlock()
|
||||
|
||||
if len(c.tempoHistory) < 2 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Calculate average measured BPM from recent samples
|
||||
historyLen := len(c.tempoHistory)
|
||||
recentCount := 10
|
||||
if historyLen < recentCount {
|
||||
recentCount = historyLen
|
||||
}
|
||||
|
||||
recent := c.tempoHistory[historyLen-recentCount:]
|
||||
if len(recent) < 2 {
|
||||
recent = c.tempoHistory
|
||||
}
|
||||
|
||||
totalBPM := 0.0
|
||||
for _, sample := range recent {
|
||||
totalBPM += sample.ActualBPM
|
||||
}
|
||||
avgMeasuredBPM := totalBPM / float64(len(recent))
|
||||
|
||||
// Calculate drift
|
||||
expectedBeatDuration := 60.0 / float64(c.currentTempo)
|
||||
actualBeatDuration := 60.0 / avgMeasuredBPM
|
||||
|
||||
drift := actualBeatDuration - expectedBeatDuration
|
||||
return time.Duration(drift * float64(time.Second))
|
||||
}
|
||||
|
||||
// Health returns the current health status
|
||||
func (c *client) Health() HealthStatus {
|
||||
c.errorMutex.RLock()
|
||||
errors := make([]string, len(c.errors))
|
||||
copy(errors, c.errors)
|
||||
c.errorMutex.RUnlock()
|
||||
|
||||
c.beatMutex.RLock()
|
||||
timeDrift := time.Since(c.lastBeatTime)
|
||||
currentTempo := c.currentTempo
|
||||
|
||||
// Calculate measured BPM from recent tempo history
|
||||
measuredBPM := 60.0 // Default
|
||||
if len(c.tempoHistory) > 0 {
|
||||
historyLen := len(c.tempoHistory)
|
||||
recentCount := 5
|
||||
if historyLen < recentCount {
|
||||
recentCount = historyLen
|
||||
}
|
||||
|
||||
recent := c.tempoHistory[historyLen-recentCount:]
|
||||
totalBPM := 0.0
|
||||
for _, sample := range recent {
|
||||
totalBPM += sample.ActualBPM
|
||||
}
|
||||
measuredBPM = totalBPM / float64(len(recent))
|
||||
}
|
||||
c.beatMutex.RUnlock()
|
||||
|
||||
tempoDrift := c.GetTempoDrift()
|
||||
|
||||
return HealthStatus{
|
||||
Connected: c.nc != nil && c.nc.IsConnected(),
|
||||
LastBeat: c.GetCurrentBeat(),
|
||||
LastBeatTime: c.lastBeatTime,
|
||||
TimeDrift: timeDrift,
|
||||
ReconnectCount: c.reconnectCount,
|
||||
LocalDegradation: c.localDegradation,
|
||||
CurrentTempo: currentTempo,
|
||||
TempoDrift: tempoDrift,
|
||||
MeasuredBPM: measuredBPM,
|
||||
Errors: errors,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user