Integrate BACKBEAT SDK and resolve KACHING license validation

Major integrations and fixes:
- Added BACKBEAT SDK integration for P2P operation timing
- Implemented beat-aware status tracking for distributed operations
- Added Docker secrets support for secure license management
- Resolved KACHING license validation via HTTPS/TLS
- Updated docker-compose configuration for clean stack deployment
- Disabled rollback policies to prevent deployment failures
- Added license credential storage (CHORUS-DEV-MULTI-001)

Technical improvements:
- BACKBEAT P2P operation tracking with phase management
- Enhanced configuration system with file-based secrets
- Improved error handling for license validation
- Clean separation of KACHING and CHORUS deployment stacks

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-09-06 07:56:26 +10:00
parent 543ab216f9
commit 9bdcbe0447
4730 changed files with 1480093 additions and 1916 deletions

View File

@@ -0,0 +1,232 @@
package backoff
import (
"math"
"math/rand"
"sync"
"time"
logging "github.com/ipfs/go-log/v2"
)
var log = logging.Logger("discovery-backoff")
type BackoffFactory func() BackoffStrategy
// BackoffStrategy describes how backoff will be implemented. BackoffStrategies are stateful.
type BackoffStrategy interface {
// Delay calculates how long the next backoff duration should be, given the prior calls to Delay
Delay() time.Duration
// Reset clears the internal state of the BackoffStrategy
Reset()
}
// Jitter implementations taken roughly from https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
// Jitter must return a duration between min and max. Min must be lower than, or equal to, max.
type Jitter func(duration, min, max time.Duration, rng *rand.Rand) time.Duration
// FullJitter returns a random number, uniformly chosen from the range [min, boundedDur].
// boundedDur is the duration bounded between min and max.
func FullJitter(duration, min, max time.Duration, rng *rand.Rand) time.Duration {
if duration <= min {
return min
}
normalizedDur := boundedDuration(duration, min, max) - min
return boundedDuration(time.Duration(rng.Int63n(int64(normalizedDur)))+min, min, max)
}
// NoJitter returns the duration bounded between min and max
func NoJitter(duration, min, max time.Duration, rng *rand.Rand) time.Duration {
return boundedDuration(duration, min, max)
}
type randomizedBackoff struct {
min time.Duration
max time.Duration
rng *rand.Rand
}
func (b *randomizedBackoff) BoundedDelay(duration time.Duration) time.Duration {
return boundedDuration(duration, b.min, b.max)
}
func boundedDuration(d, min, max time.Duration) time.Duration {
if d < min {
return min
}
if d > max {
return max
}
return d
}
type attemptBackoff struct {
attempt int
jitter Jitter
randomizedBackoff
}
func (b *attemptBackoff) Reset() {
b.attempt = 0
}
// NewFixedBackoff creates a BackoffFactory with a constant backoff duration
func NewFixedBackoff(delay time.Duration) BackoffFactory {
return func() BackoffStrategy {
return &fixedBackoff{delay: delay}
}
}
type fixedBackoff struct {
delay time.Duration
}
func (b *fixedBackoff) Delay() time.Duration {
return b.delay
}
func (b *fixedBackoff) Reset() {}
// NewPolynomialBackoff creates a BackoffFactory with backoff of the form c0*x^0, c1*x^1, ...cn*x^n where x is the attempt number
// jitter is the function for adding randomness around the backoff
// timeUnits are the units of time the polynomial is evaluated in
// polyCoefs is the array of polynomial coefficients from [c0, c1, ... cn]
func NewPolynomialBackoff(min, max time.Duration, jitter Jitter,
timeUnits time.Duration, polyCoefs []float64, rngSrc rand.Source) BackoffFactory {
rng := rand.New(&lockedSource{src: rngSrc})
return func() BackoffStrategy {
return &polynomialBackoff{
attemptBackoff: attemptBackoff{
randomizedBackoff: randomizedBackoff{
min: min,
max: max,
rng: rng,
},
jitter: jitter,
},
timeUnits: timeUnits,
poly: polyCoefs,
}
}
}
type polynomialBackoff struct {
attemptBackoff
timeUnits time.Duration
poly []float64
}
func (b *polynomialBackoff) Delay() time.Duration {
var polySum float64
switch len(b.poly) {
case 0:
return 0
case 1:
polySum = b.poly[0]
default:
polySum = b.poly[0]
exp := 1
attempt := b.attempt
b.attempt++
for _, c := range b.poly[1:] {
exp *= attempt
polySum += float64(exp) * c
}
}
return b.jitter(time.Duration(float64(b.timeUnits)*polySum), b.min, b.max, b.rng)
}
// NewExponentialBackoff creates a BackoffFactory with backoff of the form base^x + offset where x is the attempt number
// jitter is the function for adding randomness around the backoff
// timeUnits are the units of time the base^x is evaluated in
func NewExponentialBackoff(min, max time.Duration, jitter Jitter,
timeUnits time.Duration, base float64, offset time.Duration, rngSrc rand.Source) BackoffFactory {
rng := rand.New(&lockedSource{src: rngSrc})
return func() BackoffStrategy {
return &exponentialBackoff{
attemptBackoff: attemptBackoff{
randomizedBackoff: randomizedBackoff{
min: min,
max: max,
rng: rng,
},
jitter: jitter,
},
timeUnits: timeUnits,
base: base,
offset: offset,
}
}
}
type exponentialBackoff struct {
attemptBackoff
timeUnits time.Duration
base float64
offset time.Duration
}
func (b *exponentialBackoff) Delay() time.Duration {
attempt := b.attempt
b.attempt++
return b.jitter(
time.Duration(math.Pow(b.base, float64(attempt))*float64(b.timeUnits))+b.offset, b.min, b.max, b.rng)
}
// NewExponentialDecorrelatedJitter creates a BackoffFactory with backoff of the roughly of the form base^x where x is the attempt number.
// Delays start at the minimum duration and after each attempt delay = rand(min, delay * base), bounded by the max
// See https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ for more information
func NewExponentialDecorrelatedJitter(min, max time.Duration, base float64, rngSrc rand.Source) BackoffFactory {
rng := rand.New(&lockedSource{src: rngSrc})
return func() BackoffStrategy {
return &exponentialDecorrelatedJitter{
randomizedBackoff: randomizedBackoff{
min: min,
max: max,
rng: rng,
},
base: base,
}
}
}
type exponentialDecorrelatedJitter struct {
randomizedBackoff
base float64
lastDelay time.Duration
}
func (b *exponentialDecorrelatedJitter) Delay() time.Duration {
if b.lastDelay < b.min {
b.lastDelay = b.min
return b.lastDelay
}
nextMax := int64(float64(b.lastDelay) * b.base)
b.lastDelay = boundedDuration(time.Duration(b.rng.Int63n(nextMax-int64(b.min)))+b.min, b.min, b.max)
return b.lastDelay
}
func (b *exponentialDecorrelatedJitter) Reset() { b.lastDelay = 0 }
type lockedSource struct {
lk sync.Mutex
src rand.Source
}
func (r *lockedSource) Int63() (n int64) {
r.lk.Lock()
n = r.src.Int63()
r.lk.Unlock()
return
}
func (r *lockedSource) Seed(seed int64) {
r.lk.Lock()
r.src.Seed(seed)
r.lk.Unlock()
}

View File

@@ -0,0 +1,337 @@
package backoff
import (
"context"
"fmt"
"sync"
"time"
"github.com/libp2p/go-libp2p/core/discovery"
"github.com/libp2p/go-libp2p/core/peer"
ma "github.com/multiformats/go-multiaddr"
)
// BackoffDiscovery is an implementation of discovery that caches peer data and attenuates repeated queries
type BackoffDiscovery struct {
disc discovery.Discovery
stratFactory BackoffFactory
peerCache map[string]*backoffCache
peerCacheMux sync.RWMutex
parallelBufSz int
returnedBufSz int
clock clock
}
type BackoffDiscoveryOption func(*BackoffDiscovery) error
func NewBackoffDiscovery(disc discovery.Discovery, stratFactory BackoffFactory, opts ...BackoffDiscoveryOption) (discovery.Discovery, error) {
b := &BackoffDiscovery{
disc: disc,
stratFactory: stratFactory,
peerCache: make(map[string]*backoffCache),
parallelBufSz: 32,
returnedBufSz: 32,
clock: realClock{},
}
for _, opt := range opts {
if err := opt(b); err != nil {
return nil, err
}
}
return b, nil
}
// WithBackoffDiscoverySimultaneousQueryBufferSize sets the buffer size for the channels between the main FindPeers query
// for a given namespace and all simultaneous FindPeers queries for the namespace
func WithBackoffDiscoverySimultaneousQueryBufferSize(size int) BackoffDiscoveryOption {
return func(b *BackoffDiscovery) error {
if size < 0 {
return fmt.Errorf("cannot set size to be smaller than 0")
}
b.parallelBufSz = size
return nil
}
}
// WithBackoffDiscoveryReturnedChannelSize sets the size of the buffer to be used during a FindPeer query.
// Note: This does not apply if the query occurs during the backoff time
func WithBackoffDiscoveryReturnedChannelSize(size int) BackoffDiscoveryOption {
return func(b *BackoffDiscovery) error {
if size < 0 {
return fmt.Errorf("cannot set size to be smaller than 0")
}
b.returnedBufSz = size
return nil
}
}
type clock interface {
Now() time.Time
}
type realClock struct{}
func (c realClock) Now() time.Time {
return time.Now()
}
// withClock lets you override the default time.Now() call. Useful for tests.
func withClock(c clock) BackoffDiscoveryOption {
return func(b *BackoffDiscovery) error {
b.clock = c
return nil
}
}
type backoffCache struct {
// strat is assigned on creation and not written to
strat BackoffStrategy
mux sync.Mutex // guards writes to all following fields
nextDiscover time.Time
prevPeers map[peer.ID]peer.AddrInfo
peers map[peer.ID]peer.AddrInfo
sendingChs map[chan peer.AddrInfo]int
ongoing bool
clock clock
}
func (d *BackoffDiscovery) Advertise(ctx context.Context, ns string, opts ...discovery.Option) (time.Duration, error) {
return d.disc.Advertise(ctx, ns, opts...)
}
func (d *BackoffDiscovery) FindPeers(ctx context.Context, ns string, opts ...discovery.Option) (<-chan peer.AddrInfo, error) {
// Get options
var options discovery.Options
err := options.Apply(opts...)
if err != nil {
return nil, err
}
// Get cached peers
d.peerCacheMux.RLock()
c, ok := d.peerCache[ns]
d.peerCacheMux.RUnlock()
/*
Overall plan:
If it's time to look for peers, look for peers, then return them
If it's not time then return cache
If it's time to look for peers, but we have already started looking. Get up to speed with ongoing request
*/
// Setup cache if we don't have one yet
if !ok {
pc := &backoffCache{
nextDiscover: time.Time{},
prevPeers: make(map[peer.ID]peer.AddrInfo),
peers: make(map[peer.ID]peer.AddrInfo),
sendingChs: make(map[chan peer.AddrInfo]int),
strat: d.stratFactory(),
clock: d.clock,
}
d.peerCacheMux.Lock()
c, ok = d.peerCache[ns]
if !ok {
d.peerCache[ns] = pc
c = pc
}
d.peerCacheMux.Unlock()
}
c.mux.Lock()
defer c.mux.Unlock()
timeExpired := d.clock.Now().After(c.nextDiscover)
// If it's not yet time to search again and no searches are in progress then return cached peers
if !(timeExpired || c.ongoing) {
chLen := options.Limit
if chLen == 0 {
chLen = len(c.prevPeers)
} else if chLen > len(c.prevPeers) {
chLen = len(c.prevPeers)
}
pch := make(chan peer.AddrInfo, chLen)
for _, ai := range c.prevPeers {
select {
case pch <- ai:
default:
// skip if we have asked for a lower limit than the number of peers known
}
}
close(pch)
return pch, nil
}
// If a request is not already in progress setup a dispatcher channel for dispatching incoming peers
if !c.ongoing {
pch, err := d.disc.FindPeers(ctx, ns, opts...)
if err != nil {
return nil, err
}
c.ongoing = true
go findPeerDispatcher(ctx, c, pch)
}
// Setup receiver channel for receiving peers from ongoing requests
evtCh := make(chan peer.AddrInfo, d.parallelBufSz)
pch := make(chan peer.AddrInfo, d.returnedBufSz)
rcvPeers := make([]peer.AddrInfo, 0, 32)
for _, ai := range c.peers {
rcvPeers = append(rcvPeers, ai)
}
c.sendingChs[evtCh] = options.Limit
go findPeerReceiver(ctx, pch, evtCh, rcvPeers)
return pch, nil
}
func findPeerDispatcher(ctx context.Context, c *backoffCache, pch <-chan peer.AddrInfo) {
defer func() {
c.mux.Lock()
// If the peer addresses have changed reset the backoff
if checkUpdates(c.prevPeers, c.peers) {
c.strat.Reset()
c.prevPeers = c.peers
}
c.nextDiscover = c.clock.Now().Add(c.strat.Delay())
c.ongoing = false
c.peers = make(map[peer.ID]peer.AddrInfo)
for ch := range c.sendingChs {
close(ch)
}
c.sendingChs = make(map[chan peer.AddrInfo]int)
c.mux.Unlock()
}()
for {
select {
case ai, ok := <-pch:
if !ok {
return
}
c.mux.Lock()
// If we receive the same peer multiple times return the address union
var sendAi peer.AddrInfo
if prevAi, ok := c.peers[ai.ID]; ok {
if combinedAi := mergeAddrInfos(prevAi, ai); combinedAi != nil {
sendAi = *combinedAi
} else {
c.mux.Unlock()
continue
}
} else {
sendAi = ai
}
c.peers[ai.ID] = sendAi
for ch, rem := range c.sendingChs {
if rem > 0 {
ch <- sendAi
c.sendingChs[ch] = rem - 1
}
}
c.mux.Unlock()
case <-ctx.Done():
return
}
}
}
func findPeerReceiver(ctx context.Context, pch, evtCh chan peer.AddrInfo, rcvPeers []peer.AddrInfo) {
defer close(pch)
for {
select {
case ai, ok := <-evtCh:
if ok {
rcvPeers = append(rcvPeers, ai)
sentAll := true
sendPeers:
for i, p := range rcvPeers {
select {
case pch <- p:
default:
rcvPeers = rcvPeers[i:]
sentAll = false
break sendPeers
}
}
if sentAll {
rcvPeers = []peer.AddrInfo{}
}
} else {
for _, p := range rcvPeers {
select {
case pch <- p:
case <-ctx.Done():
return
}
}
return
}
case <-ctx.Done():
return
}
}
}
func mergeAddrInfos(prevAi, newAi peer.AddrInfo) *peer.AddrInfo {
seen := make(map[string]struct{}, len(prevAi.Addrs))
combinedAddrs := make([]ma.Multiaddr, 0, len(prevAi.Addrs))
addAddrs := func(addrs []ma.Multiaddr) {
for _, addr := range addrs {
if _, ok := seen[addr.String()]; ok {
continue
}
seen[addr.String()] = struct{}{}
combinedAddrs = append(combinedAddrs, addr)
}
}
addAddrs(prevAi.Addrs)
addAddrs(newAi.Addrs)
if len(combinedAddrs) > len(prevAi.Addrs) {
combinedAi := &peer.AddrInfo{ID: prevAi.ID, Addrs: combinedAddrs}
return combinedAi
}
return nil
}
func checkUpdates(orig, update map[peer.ID]peer.AddrInfo) bool {
if len(orig) != len(update) {
return true
}
for p, ai := range update {
if prevAi, ok := orig[p]; ok {
if combinedAi := mergeAddrInfos(prevAi, ai); combinedAi != nil {
return true
}
} else {
return true
}
}
return false
}

View File

@@ -0,0 +1,94 @@
package backoff
import (
"context"
"sync"
"time"
"github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/peer"
lru "github.com/hashicorp/golang-lru/v2"
)
// BackoffConnector is a utility to connect to peers, but only if we have not recently tried connecting to them already
type BackoffConnector struct {
cache *lru.TwoQueueCache[peer.ID, *connCacheData]
host host.Host
connTryDur time.Duration
backoff BackoffFactory
mux sync.Mutex
}
// NewBackoffConnector creates a utility to connect to peers, but only if we have not recently tried connecting to them already
// cacheSize is the size of a TwoQueueCache
// connectionTryDuration is how long we attempt to connect to a peer before giving up
// backoff describes the strategy used to decide how long to backoff after previously attempting to connect to a peer
func NewBackoffConnector(h host.Host, cacheSize int, connectionTryDuration time.Duration, backoff BackoffFactory) (*BackoffConnector, error) {
cache, err := lru.New2Q[peer.ID, *connCacheData](cacheSize)
if err != nil {
return nil, err
}
return &BackoffConnector{
cache: cache,
host: h,
connTryDur: connectionTryDuration,
backoff: backoff,
}, nil
}
type connCacheData struct {
nextTry time.Time
strat BackoffStrategy
}
// Connect attempts to connect to the peers passed in by peerCh. Will not connect to peers if they are within the backoff period.
// As Connect will attempt to dial peers as soon as it learns about them, the caller should try to keep the number,
// and rate, of inbound peers manageable.
func (c *BackoffConnector) Connect(ctx context.Context, peerCh <-chan peer.AddrInfo) {
for {
select {
case pi, ok := <-peerCh:
if !ok {
return
}
if pi.ID == c.host.ID() || pi.ID == "" {
continue
}
c.mux.Lock()
var cachedPeer *connCacheData
if tv, ok := c.cache.Get(pi.ID); ok {
now := time.Now()
if now.Before(tv.nextTry) {
c.mux.Unlock()
continue
}
tv.nextTry = now.Add(tv.strat.Delay())
} else {
cachedPeer = &connCacheData{strat: c.backoff()}
cachedPeer.nextTry = time.Now().Add(cachedPeer.strat.Delay())
c.cache.Add(pi.ID, cachedPeer)
}
c.mux.Unlock()
go func(pi peer.AddrInfo) {
ctx, cancel := context.WithTimeout(ctx, c.connTryDur)
defer cancel()
err := c.host.Connect(ctx, pi)
if err != nil {
log.Debugf("Error connecting to pubsub peer %s: %s", pi.ID, err.Error())
return
}
}(pi)
case <-ctx.Done():
log.Infof("discovery: backoff connector context error %v", ctx.Err())
return
}
}
}