Integrate BACKBEAT SDK and resolve KACHING license validation

Major integrations and fixes:
- Added BACKBEAT SDK integration for P2P operation timing
- Implemented beat-aware status tracking for distributed operations
- Added Docker secrets support for secure license management
- Resolved KACHING license validation via HTTPS/TLS
- Updated docker-compose configuration for clean stack deployment
- Disabled rollback policies to prevent deployment failures
- Added license credential storage (CHORUS-DEV-MULTI-001)

Technical improvements:
- BACKBEAT P2P operation tracking with phase management
- Enhanced configuration system with file-based secrets
- Improved error handling for license validation
- Clean separation of KACHING and CHORUS deployment stacks

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-09-06 07:56:26 +10:00
parent 543ab216f9
commit 9bdcbe0447
4730 changed files with 1480093 additions and 1916 deletions

View File

@@ -0,0 +1,172 @@
package config
import (
"fmt"
"time"
"github.com/ipfs/boxo/ipns"
ds "github.com/ipfs/go-datastore"
dssync "github.com/ipfs/go-datastore/sync"
"github.com/libp2p/go-libp2p-kad-dht/providers"
"github.com/libp2p/go-libp2p-kbucket/peerdiversity"
record "github.com/libp2p/go-libp2p-record"
"github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/core/protocol"
ma "github.com/multiformats/go-multiaddr"
)
// DefaultPrefix is the application specific prefix attached to all DHT protocols by default.
const DefaultPrefix protocol.ID = "/ipfs"
const defaultBucketSize = 20
// ModeOpt describes what mode the dht should operate in
type ModeOpt int
// QueryFilterFunc is a filter applied when considering peers to dial when querying
type QueryFilterFunc func(dht interface{}, ai peer.AddrInfo) bool
// RouteTableFilterFunc is a filter applied when considering connections to keep in
// the local route table.
type RouteTableFilterFunc func(dht interface{}, p peer.ID) bool
// Config is a structure containing all the options that can be used when constructing a DHT.
type Config struct {
Datastore ds.Batching
Validator record.Validator
ValidatorChanged bool // if true implies that the validator has been changed and that Defaults should not be used
Mode ModeOpt
ProtocolPrefix protocol.ID
V1ProtocolOverride protocol.ID
BucketSize int
Concurrency int
Resiliency int
MaxRecordAge time.Duration
EnableProviders bool
EnableValues bool
ProviderStore providers.ProviderStore
QueryPeerFilter QueryFilterFunc
LookupCheckConcurrency int
RoutingTable struct {
RefreshQueryTimeout time.Duration
RefreshInterval time.Duration
AutoRefresh bool
LatencyTolerance time.Duration
CheckInterval time.Duration
PeerFilter RouteTableFilterFunc
DiversityFilter peerdiversity.PeerIPGroupFilter
}
BootstrapPeers func() []peer.AddrInfo
AddressFilter func([]ma.Multiaddr) []ma.Multiaddr
// test specific Config options
DisableFixLowPeers bool
TestAddressUpdateProcessing bool
EnableOptimisticProvide bool
OptimisticProvideJobsPoolSize int
}
func EmptyQueryFilter(_ interface{}, ai peer.AddrInfo) bool { return true }
func EmptyRTFilter(_ interface{}, p peer.ID) bool { return true }
// Apply applies the given options to this Option
func (c *Config) Apply(opts ...Option) error {
for i, opt := range opts {
if err := opt(c); err != nil {
return fmt.Errorf("dht option %d failed: %s", i, err)
}
}
return nil
}
// ApplyFallbacks sets default values that could not be applied during config creation since they are dependent
// on other configuration parameters (e.g. optA is by default 2x optB) and/or on the Host
func (c *Config) ApplyFallbacks(h host.Host) error {
if !c.ValidatorChanged {
nsval, ok := c.Validator.(record.NamespacedValidator)
if ok {
if _, pkFound := nsval["pk"]; !pkFound {
nsval["pk"] = record.PublicKeyValidator{}
}
if _, ipnsFound := nsval["ipns"]; !ipnsFound {
nsval["ipns"] = ipns.Validator{KeyBook: h.Peerstore()}
}
} else {
return fmt.Errorf("the default Validator was changed without being marked as changed")
}
}
return nil
}
// Option DHT option type.
type Option func(*Config) error
// Defaults are the default DHT options. This option will be automatically
// prepended to any options you pass to the DHT constructor.
var Defaults = func(o *Config) error {
o.Validator = record.NamespacedValidator{}
o.Datastore = dssync.MutexWrap(ds.NewMapDatastore())
o.ProtocolPrefix = DefaultPrefix
o.EnableProviders = true
o.EnableValues = true
o.QueryPeerFilter = EmptyQueryFilter
o.RoutingTable.LatencyTolerance = 10 * time.Second
o.RoutingTable.RefreshQueryTimeout = 10 * time.Second
o.RoutingTable.RefreshInterval = 10 * time.Minute
o.RoutingTable.AutoRefresh = true
o.RoutingTable.PeerFilter = EmptyRTFilter
o.MaxRecordAge = providers.ProvideValidity
o.BucketSize = defaultBucketSize
o.Concurrency = 10
o.Resiliency = 3
o.LookupCheckConcurrency = 256
// MAGIC: It makes sense to set it to a multiple of OptProvReturnRatio * BucketSize. We chose a multiple of 4.
o.OptimisticProvideJobsPoolSize = 60
return nil
}
func (c *Config) Validate() error {
if c.ProtocolPrefix != DefaultPrefix {
return nil
}
if c.BucketSize != defaultBucketSize {
return fmt.Errorf("protocol prefix %s must use bucket size %d", DefaultPrefix, defaultBucketSize)
}
if !c.EnableProviders {
return fmt.Errorf("protocol prefix %s must have providers enabled", DefaultPrefix)
}
if !c.EnableValues {
return fmt.Errorf("protocol prefix %s must have values enabled", DefaultPrefix)
}
nsval, isNSVal := c.Validator.(record.NamespacedValidator)
if !isNSVal {
return fmt.Errorf("protocol prefix %s must use a namespaced Validator", DefaultPrefix)
}
if len(nsval) != 2 {
return fmt.Errorf("protocol prefix %s must have exactly two namespaced validators - /pk and /ipns", DefaultPrefix)
}
if pkVal, pkValFound := nsval["pk"]; !pkValFound {
return fmt.Errorf("protocol prefix %s must support the /pk namespaced Validator", DefaultPrefix)
} else if _, ok := pkVal.(record.PublicKeyValidator); !ok {
return fmt.Errorf("protocol prefix %s must use the record.PublicKeyValidator for the /pk namespace", DefaultPrefix)
}
if ipnsVal, ipnsValFound := nsval["ipns"]; !ipnsValFound {
return fmt.Errorf("protocol prefix %s must support the /ipns namespaced Validator", DefaultPrefix)
} else if _, ok := ipnsVal.(ipns.Validator); !ok {
return fmt.Errorf("protocol prefix %s must use ipns.Validator for the /ipns namespace", DefaultPrefix)
}
return nil
}

View File

@@ -0,0 +1,16 @@
package config
import "github.com/libp2p/go-libp2p/core/routing"
type QuorumOptionKey struct{}
const defaultQuorum = 0
// GetQuorum defaults to 0 if no option is found
func GetQuorum(opts *routing.Options) int {
responsesNeeded, ok := opts.Other[QuorumOptionKey{}].(int)
if !ok {
responsesNeeded = defaultQuorum
}
return responsesNeeded
}

View File

@@ -0,0 +1,28 @@
package internal
import (
"context"
)
type CtxMutex chan struct{}
func NewCtxMutex() CtxMutex {
return make(CtxMutex, 1)
}
func (m CtxMutex) Lock(ctx context.Context) error {
select {
case m <- struct{}{}:
return nil
case <-ctx.Done():
return ctx.Err()
}
}
func (m CtxMutex) Unlock() {
select {
case <-m:
default:
panic("not locked")
}
}

View File

@@ -0,0 +1,5 @@
package internal
import "errors"
var ErrIncorrectRecord = errors.New("received incorrect record")

View File

@@ -0,0 +1,92 @@
package internal
import (
"fmt"
"strings"
"github.com/ipfs/go-cid"
"github.com/multiformats/go-multibase"
"github.com/multiformats/go-multihash"
)
func multibaseB32Encode(k []byte) string {
res, err := multibase.Encode(multibase.Base32, k)
if err != nil {
// Should be unreachable
panic(err)
}
return res
}
func tryFormatLoggableRecordKey(k string) (string, error) {
if len(k) == 0 {
return "", fmt.Errorf("LoggableRecordKey is empty")
}
var proto, cstr string
if k[0] == '/' {
// it's a path (probably)
protoEnd := strings.IndexByte(k[1:], '/')
if protoEnd < 0 {
return "", fmt.Errorf("LoggableRecordKey starts with '/' but is not a path: %s", multibaseB32Encode([]byte(k)))
}
proto = k[1 : protoEnd+1]
cstr = k[protoEnd+2:]
encStr := multibaseB32Encode([]byte(cstr))
return fmt.Sprintf("/%s/%s", proto, encStr), nil
}
return "", fmt.Errorf("LoggableRecordKey is not a path: %s", multibaseB32Encode([]byte(cstr)))
}
type LoggableRecordKeyString string
func (lk LoggableRecordKeyString) String() string {
k := string(lk)
newKey, err := tryFormatLoggableRecordKey(k)
if err == nil {
return newKey
}
return err.Error()
}
type LoggableRecordKeyBytes []byte
func (lk LoggableRecordKeyBytes) String() string {
k := string(lk)
newKey, err := tryFormatLoggableRecordKey(k)
if err == nil {
return newKey
}
return err.Error()
}
type LoggableProviderRecordBytes []byte
func (lk LoggableProviderRecordBytes) String() string {
newKey, err := tryFormatLoggableProviderKey(lk)
if err == nil {
return newKey
}
return err.Error()
}
func tryFormatLoggableProviderKey(k []byte) (string, error) {
if len(k) == 0 {
return "", fmt.Errorf("LoggableProviderKey is empty")
}
encodedKey := multibaseB32Encode(k)
// The DHT used to provide CIDs, but now provides multihashes
// TODO: Drop this when enough of the network has upgraded
if _, err := cid.Cast(k); err == nil {
return encodedKey, nil
}
if _, err := multihash.Cast(k); err == nil {
return encodedKey, nil
}
return "", fmt.Errorf("LoggableProviderKey is not a Multihash or CID: %s", encodedKey)
}

View File

@@ -0,0 +1,387 @@
package net
import (
"bufio"
"context"
"fmt"
"io"
"sync"
"time"
"github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/core/protocol"
logging "github.com/ipfs/go-log"
"github.com/libp2p/go-msgio"
//lint:ignore SA1019 TODO migrate away from gogo pb
"github.com/libp2p/go-msgio/protoio"
"go.opencensus.io/stats"
"go.opencensus.io/tag"
"github.com/libp2p/go-libp2p-kad-dht/internal"
"github.com/libp2p/go-libp2p-kad-dht/metrics"
pb "github.com/libp2p/go-libp2p-kad-dht/pb"
)
var dhtReadMessageTimeout = 10 * time.Second
// ErrReadTimeout is an error that occurs when no message is read within the timeout period.
var ErrReadTimeout = fmt.Errorf("timed out reading response")
var logger = logging.Logger("dht")
// messageSenderImpl is responsible for sending requests and messages to peers efficiently, including reuse of streams.
// It also tracks metrics for sent requests and messages.
type messageSenderImpl struct {
host host.Host // the network services we need
smlk sync.Mutex
strmap map[peer.ID]*peerMessageSender
protocols []protocol.ID
}
func NewMessageSenderImpl(h host.Host, protos []protocol.ID) pb.MessageSenderWithDisconnect {
return &messageSenderImpl{
host: h,
strmap: make(map[peer.ID]*peerMessageSender),
protocols: protos,
}
}
func (m *messageSenderImpl) OnDisconnect(ctx context.Context, p peer.ID) {
m.smlk.Lock()
defer m.smlk.Unlock()
ms, ok := m.strmap[p]
if !ok {
return
}
delete(m.strmap, p)
// Do this asynchronously as ms.lk can block for a while.
go func() {
if err := ms.lk.Lock(ctx); err != nil {
return
}
defer ms.lk.Unlock()
ms.invalidate()
}()
}
// SendRequest sends out a request, but also makes sure to
// measure the RTT for latency measurements.
func (m *messageSenderImpl) SendRequest(ctx context.Context, p peer.ID, pmes *pb.Message) (*pb.Message, error) {
ctx, _ = tag.New(ctx, metrics.UpsertMessageType(pmes))
ms, err := m.messageSenderForPeer(ctx, p)
if err != nil {
stats.Record(ctx,
metrics.SentRequests.M(1),
metrics.SentRequestErrors.M(1),
)
logger.Debugw("request failed to open message sender", "error", err, "to", p)
return nil, err
}
start := time.Now()
rpmes, err := ms.SendRequest(ctx, pmes)
if err != nil {
stats.Record(ctx,
metrics.SentRequests.M(1),
metrics.SentRequestErrors.M(1),
)
logger.Debugw("request failed", "error", err, "to", p)
return nil, err
}
stats.Record(ctx,
metrics.SentRequests.M(1),
metrics.SentBytes.M(int64(pmes.Size())),
metrics.OutboundRequestLatency.M(float64(time.Since(start))/float64(time.Millisecond)),
)
m.host.Peerstore().RecordLatency(p, time.Since(start))
return rpmes, nil
}
// SendMessage sends out a message
func (m *messageSenderImpl) SendMessage(ctx context.Context, p peer.ID, pmes *pb.Message) error {
ctx, _ = tag.New(ctx, metrics.UpsertMessageType(pmes))
ms, err := m.messageSenderForPeer(ctx, p)
if err != nil {
stats.Record(ctx,
metrics.SentMessages.M(1),
metrics.SentMessageErrors.M(1),
)
logger.Debugw("message failed to open message sender", "error", err, "to", p)
return err
}
if err := ms.SendMessage(ctx, pmes); err != nil {
stats.Record(ctx,
metrics.SentMessages.M(1),
metrics.SentMessageErrors.M(1),
)
logger.Debugw("message failed", "error", err, "to", p)
return err
}
stats.Record(ctx,
metrics.SentMessages.M(1),
metrics.SentBytes.M(int64(pmes.Size())),
)
return nil
}
func (m *messageSenderImpl) messageSenderForPeer(ctx context.Context, p peer.ID) (*peerMessageSender, error) {
m.smlk.Lock()
ms, ok := m.strmap[p]
if ok {
m.smlk.Unlock()
return ms, nil
}
ms = &peerMessageSender{p: p, m: m, lk: internal.NewCtxMutex()}
m.strmap[p] = ms
m.smlk.Unlock()
if err := ms.prepOrInvalidate(ctx); err != nil {
m.smlk.Lock()
defer m.smlk.Unlock()
if msCur, ok := m.strmap[p]; ok {
// Changed. Use the new one, old one is invalid and
// not in the map so we can just throw it away.
if ms != msCur {
return msCur, nil
}
// Not changed, remove the now invalid stream from the
// map.
delete(m.strmap, p)
}
// Invalid but not in map. Must have been removed by a disconnect.
return nil, err
}
// All ready to go.
return ms, nil
}
// peerMessageSender is responsible for sending requests and messages to a particular peer
type peerMessageSender struct {
s network.Stream
r msgio.ReadCloser
lk internal.CtxMutex
p peer.ID
m *messageSenderImpl
invalid bool
singleMes int
}
// invalidate is called before this peerMessageSender is removed from the strmap.
// It prevents the peerMessageSender from being reused/reinitialized and then
// forgotten (leaving the stream open).
func (ms *peerMessageSender) invalidate() {
ms.invalid = true
if ms.s != nil {
_ = ms.s.Reset()
ms.s = nil
}
}
func (ms *peerMessageSender) prepOrInvalidate(ctx context.Context) error {
if err := ms.lk.Lock(ctx); err != nil {
return err
}
defer ms.lk.Unlock()
if err := ms.prep(ctx); err != nil {
ms.invalidate()
return err
}
return nil
}
func (ms *peerMessageSender) prep(ctx context.Context) error {
if ms.invalid {
return fmt.Errorf("message sender has been invalidated")
}
if ms.s != nil {
return nil
}
// We only want to speak to peers using our primary protocols. We do not want to query any peer that only speaks
// one of the secondary "server" protocols that we happen to support (e.g. older nodes that we can respond to for
// backwards compatibility reasons).
nstr, err := ms.m.host.NewStream(ctx, ms.p, ms.m.protocols...)
if err != nil {
return err
}
ms.r = msgio.NewVarintReaderSize(nstr, network.MessageSizeMax)
ms.s = nstr
return nil
}
// streamReuseTries is the number of times we will try to reuse a stream to a
// given peer before giving up and reverting to the old one-message-per-stream
// behaviour.
const streamReuseTries = 3
func (ms *peerMessageSender) SendMessage(ctx context.Context, pmes *pb.Message) error {
if err := ms.lk.Lock(ctx); err != nil {
return err
}
defer ms.lk.Unlock()
retry := false
for {
if err := ms.prep(ctx); err != nil {
return err
}
if err := ms.writeMsg(pmes); err != nil {
_ = ms.s.Reset()
ms.s = nil
if retry {
logger.Debugw("error writing message", "error", err)
return err
}
logger.Debugw("error writing message", "error", err, "retrying", true)
retry = true
continue
}
var err error
if ms.singleMes > streamReuseTries {
err = ms.s.Close()
ms.s = nil
} else if retry {
ms.singleMes++
}
return err
}
}
func (ms *peerMessageSender) SendRequest(ctx context.Context, pmes *pb.Message) (*pb.Message, error) {
if err := ms.lk.Lock(ctx); err != nil {
return nil, err
}
defer ms.lk.Unlock()
retry := false
for {
if err := ms.prep(ctx); err != nil {
return nil, err
}
if err := ms.writeMsg(pmes); err != nil {
_ = ms.s.Reset()
ms.s = nil
if retry {
logger.Debugw("error writing message", "error", err)
return nil, err
}
logger.Debugw("error writing message", "error", err, "retrying", true)
retry = true
continue
}
mes := new(pb.Message)
if err := ms.ctxReadMsg(ctx, mes); err != nil {
_ = ms.s.Reset()
ms.s = nil
if err == context.Canceled {
// retry would be same error
return nil, err
}
if retry {
logger.Debugw("error reading message", "error", err)
return nil, err
}
logger.Debugw("error reading message", "error", err, "retrying", true)
retry = true
continue
}
var err error
if ms.singleMes > streamReuseTries {
err = ms.s.Close()
ms.s = nil
} else if retry {
ms.singleMes++
}
return mes, err
}
}
func (ms *peerMessageSender) writeMsg(pmes *pb.Message) error {
return WriteMsg(ms.s, pmes)
}
func (ms *peerMessageSender) ctxReadMsg(ctx context.Context, mes *pb.Message) error {
errc := make(chan error, 1)
go func(r msgio.ReadCloser) {
defer close(errc)
bytes, err := r.ReadMsg()
defer r.ReleaseMsg(bytes)
if err != nil {
errc <- err
return
}
errc <- mes.Unmarshal(bytes)
}(ms.r)
t := time.NewTimer(dhtReadMessageTimeout)
defer t.Stop()
select {
case err := <-errc:
return err
case <-ctx.Done():
return ctx.Err()
case <-t.C:
return ErrReadTimeout
}
}
// The Protobuf writer performs multiple small writes when writing a message.
// We need to buffer those writes, to make sure that we're not sending a new
// packet for every single write.
type bufferedDelimitedWriter struct {
*bufio.Writer
protoio.WriteCloser
}
var writerPool = sync.Pool{
New: func() interface{} {
w := bufio.NewWriter(nil)
return &bufferedDelimitedWriter{
Writer: w,
WriteCloser: protoio.NewDelimitedWriter(w),
}
},
}
func WriteMsg(w io.Writer, mes *pb.Message) error {
bw := writerPool.Get().(*bufferedDelimitedWriter)
bw.Reset(w)
err := bw.WriteMsg(mes)
if err == nil {
err = bw.Flush()
}
bw.Reset(nil)
writerPool.Put(bw)
return err
}
func (w *bufferedDelimitedWriter) Flush() error {
return w.Writer.Flush()
}

View File

@@ -0,0 +1,32 @@
package internal
import (
"context"
"fmt"
"unicode/utf8"
"github.com/multiformats/go-multibase"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
)
func StartSpan(ctx context.Context, name string, opts ...trace.SpanStartOption) (context.Context, trace.Span) {
return otel.Tracer("go-libp2p-kad-dht").Start(ctx, fmt.Sprintf("KademliaDHT.%s", name), opts...)
}
// KeyAsAttribute format a DHT key into a suitable tracing attribute.
// DHT keys can be either valid utf-8 or binary, when they are derived from, for example, a multihash.
// Tracing (and notably OpenTelemetry+grpc exporter) requires valid utf-8 for string attributes.
func KeyAsAttribute(name string, key string) attribute.KeyValue {
b := []byte(key)
if utf8.Valid(b) {
return attribute.String(name, key)
}
encoded, err := multibase.Encode(multibase.Base58BTC, b)
if err != nil {
// should be unreachable
panic(err)
}
return attribute.String(name, encoded)
}