Integrate BACKBEAT SDK and resolve KACHING license validation

Major integrations and fixes:
- Added BACKBEAT SDK integration for P2P operation timing
- Implemented beat-aware status tracking for distributed operations
- Added Docker secrets support for secure license management
- Resolved KACHING license validation via HTTPS/TLS
- Updated docker-compose configuration for clean stack deployment
- Disabled rollback policies to prevent deployment failures
- Added license credential storage (CHORUS-DEV-MULTI-001)

Technical improvements:
- BACKBEAT P2P operation tracking with phase management
- Enhanced configuration system with file-based secrets
- Improved error handling for license validation
- Clean separation of KACHING and CHORUS deployment stacks

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-09-06 07:56:26 +10:00
parent 543ab216f9
commit 9bdcbe0447
4730 changed files with 1480093 additions and 1916 deletions

View File

@@ -0,0 +1,65 @@
package upgrader
import (
"fmt"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/protocol"
"github.com/libp2p/go-libp2p/core/transport"
)
type transportConn struct {
network.MuxedConn
network.ConnMultiaddrs
network.ConnSecurity
transport transport.Transport
scope network.ConnManagementScope
stat network.ConnStats
muxer protocol.ID
security protocol.ID
usedEarlyMuxerNegotiation bool
}
var _ transport.CapableConn = &transportConn{}
func (t *transportConn) Transport() transport.Transport {
return t.transport
}
func (t *transportConn) String() string {
ts := ""
if s, ok := t.transport.(fmt.Stringer); ok {
ts = "[" + s.String() + "]"
}
return fmt.Sprintf(
"<stream.Conn%s %s (%s) <-> %s (%s)>",
ts,
t.LocalMultiaddr(),
t.LocalPeer(),
t.RemoteMultiaddr(),
t.RemotePeer(),
)
}
func (t *transportConn) Stat() network.ConnStats {
return t.stat
}
func (t *transportConn) Scope() network.ConnScope {
return t.scope
}
func (t *transportConn) Close() error {
defer t.scope.Done()
return t.MuxedConn.Close()
}
func (t *transportConn) ConnState() network.ConnectionState {
return network.ConnectionState{
StreamMultiplexer: t.muxer,
Security: t.security,
Transport: "tcp",
UsedEarlyMuxerNegotiation: t.usedEarlyMuxerNegotiation,
}
}

View File

@@ -0,0 +1,182 @@
package upgrader
import (
"context"
"fmt"
"strings"
"sync"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/transport"
logging "github.com/ipfs/go-log/v2"
tec "github.com/jbenet/go-temp-err-catcher"
manet "github.com/multiformats/go-multiaddr/net"
)
var log = logging.Logger("upgrader")
type listener struct {
manet.Listener
transport transport.Transport
upgrader *upgrader
rcmgr network.ResourceManager
incoming chan transport.CapableConn
err error
// Used for backpressure
threshold *threshold
// Canceling this context isn't sufficient to tear down the listener.
// Call close.
ctx context.Context
cancel func()
}
// Close closes the listener.
func (l *listener) Close() error {
// Do this first to try to get any relevent errors.
err := l.Listener.Close()
l.cancel()
// Drain and wait.
for c := range l.incoming {
c.Close()
}
return err
}
// handles inbound connections.
//
// This function does a few interesting things that should be noted:
//
// 1. It logs and discards temporary/transient errors (errors with a Temporary()
// function that returns true).
// 2. It stops accepting new connections once AcceptQueueLength connections have
// been fully negotiated but not accepted. This gives us a basic backpressure
// mechanism while still allowing us to negotiate connections in parallel.
func (l *listener) handleIncoming() {
var wg sync.WaitGroup
defer func() {
// make sure we're closed
l.Listener.Close()
if l.err == nil {
l.err = fmt.Errorf("listener closed")
}
wg.Wait()
close(l.incoming)
}()
var catcher tec.TempErrCatcher
for l.ctx.Err() == nil {
maconn, err := l.Listener.Accept()
if err != nil {
// Note: function may pause the accept loop.
if catcher.IsTemporary(err) {
log.Infof("temporary accept error: %s", err)
continue
}
l.err = err
return
}
catcher.Reset()
// gate the connection if applicable
if l.upgrader.connGater != nil && !l.upgrader.connGater.InterceptAccept(maconn) {
log.Debugf("gater blocked incoming connection on local addr %s from %s",
maconn.LocalMultiaddr(), maconn.RemoteMultiaddr())
if err := maconn.Close(); err != nil {
log.Warnf("failed to close incoming connection rejected by gater: %s", err)
}
continue
}
connScope, err := l.rcmgr.OpenConnection(network.DirInbound, true, maconn.RemoteMultiaddr())
if err != nil {
log.Debugw("resource manager blocked accept of new connection", "error", err)
if err := maconn.Close(); err != nil {
log.Warnf("failed to incoming connection rejected by resource manager: %s", err)
}
continue
}
// The go routine below calls Release when the context is
// canceled so there's no need to wait on it here.
l.threshold.Wait()
log.Debugf("listener %s got connection: %s <---> %s",
l,
maconn.LocalMultiaddr(),
maconn.RemoteMultiaddr())
wg.Add(1)
go func() {
defer wg.Done()
ctx, cancel := context.WithTimeout(l.ctx, l.upgrader.acceptTimeout)
defer cancel()
conn, err := l.upgrader.Upgrade(ctx, l.transport, maconn, network.DirInbound, "", connScope)
if err != nil {
// Don't bother bubbling this up. We just failed
// to completely negotiate the connection.
log.Debugf("accept upgrade error: %s (%s <--> %s)",
err,
maconn.LocalMultiaddr(),
maconn.RemoteMultiaddr())
connScope.Done()
return
}
log.Debugf("listener %s accepted connection: %s", l, conn)
// This records the fact that the connection has been
// setup and is waiting to be accepted. This call
// *never* blocks, even if we go over the threshold. It
// simply ensures that calls to Wait block while we're
// over the threshold.
l.threshold.Acquire()
defer l.threshold.Release()
select {
case l.incoming <- conn:
case <-ctx.Done():
if l.ctx.Err() == nil {
// Listener *not* closed but the accept timeout expired.
log.Warn("listener dropped connection due to slow accept")
}
// Wait on the context with a timeout. This way,
// if we stop accepting connections for some reason,
// we'll eventually close all the open ones
// instead of hanging onto them.
conn.Close()
}
}()
}
}
// Accept accepts a connection.
func (l *listener) Accept() (transport.CapableConn, error) {
for c := range l.incoming {
// Could have been sitting there for a while.
if !c.IsClosed() {
return c, nil
}
}
if strings.Contains(l.err.Error(), "use of closed network connection") {
return nil, transport.ErrListenerClosed
}
return nil, l.err
}
func (l *listener) String() string {
if s, ok := l.transport.(fmt.Stringer); ok {
return fmt.Sprintf("<stream.Listener[%s] %s>", s, l.Multiaddr())
}
return fmt.Sprintf("<stream.Listener %s>", l.Multiaddr())
}
var _ transport.Listener = (*listener)(nil)

View File

@@ -0,0 +1,50 @@
package upgrader
import (
"sync"
)
func newThreshold(cutoff int) *threshold {
t := &threshold{
threshold: cutoff,
}
t.cond.L = &t.mu
return t
}
type threshold struct {
mu sync.Mutex
cond sync.Cond
count int
threshold int
}
// Acquire increments the counter. It will not block.
func (t *threshold) Acquire() {
t.mu.Lock()
t.count++
t.mu.Unlock()
}
// Release decrements the counter.
func (t *threshold) Release() {
t.mu.Lock()
if t.count == 0 {
panic("negative count")
}
if t.threshold == t.count {
t.cond.Broadcast()
}
t.count--
t.mu.Unlock()
}
// Wait waits for the counter to drop below the threshold
func (t *threshold) Wait() {
t.mu.Lock()
for t.count >= t.threshold {
t.cond.Wait()
}
t.mu.Unlock()
}

View File

@@ -0,0 +1,343 @@
package upgrader
import (
"context"
"errors"
"fmt"
"net"
"time"
"github.com/libp2p/go-libp2p/core/connmgr"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
ipnet "github.com/libp2p/go-libp2p/core/pnet"
"github.com/libp2p/go-libp2p/core/protocol"
"github.com/libp2p/go-libp2p/core/sec"
"github.com/libp2p/go-libp2p/core/transport"
"github.com/libp2p/go-libp2p/p2p/net/pnet"
manet "github.com/multiformats/go-multiaddr/net"
mss "github.com/multiformats/go-multistream"
)
// ErrNilPeer is returned when attempting to upgrade an outbound connection
// without specifying a peer ID.
var ErrNilPeer = errors.New("nil peer")
// AcceptQueueLength is the number of connections to fully setup before not accepting any new connections
var AcceptQueueLength = 16
const (
defaultAcceptTimeout = 15 * time.Second
defaultNegotiateTimeout = 60 * time.Second
)
type Option func(*upgrader) error
func WithAcceptTimeout(t time.Duration) Option {
return func(u *upgrader) error {
u.acceptTimeout = t
return nil
}
}
type StreamMuxer struct {
ID protocol.ID
Muxer network.Multiplexer
}
// Upgrader is a multistream upgrader that can upgrade an underlying connection
// to a full transport connection (secure and multiplexed).
type upgrader struct {
psk ipnet.PSK
connGater connmgr.ConnectionGater
rcmgr network.ResourceManager
muxerMuxer *mss.MultistreamMuxer[protocol.ID]
muxers []StreamMuxer
muxerIDs []protocol.ID
security []sec.SecureTransport
securityMuxer *mss.MultistreamMuxer[protocol.ID]
securityIDs []protocol.ID
// AcceptTimeout is the maximum duration an Accept is allowed to take.
// This includes the time between accepting the raw network connection,
// protocol selection as well as the handshake, if applicable.
//
// If unset, the default value (15s) is used.
acceptTimeout time.Duration
}
var _ transport.Upgrader = &upgrader{}
func New(security []sec.SecureTransport, muxers []StreamMuxer, psk ipnet.PSK, rcmgr network.ResourceManager, connGater connmgr.ConnectionGater, opts ...Option) (transport.Upgrader, error) {
u := &upgrader{
acceptTimeout: defaultAcceptTimeout,
rcmgr: rcmgr,
connGater: connGater,
psk: psk,
muxerMuxer: mss.NewMultistreamMuxer[protocol.ID](),
muxers: muxers,
security: security,
securityMuxer: mss.NewMultistreamMuxer[protocol.ID](),
}
for _, opt := range opts {
if err := opt(u); err != nil {
return nil, err
}
}
if u.rcmgr == nil {
u.rcmgr = &network.NullResourceManager{}
}
u.muxerIDs = make([]protocol.ID, 0, len(muxers))
for _, m := range muxers {
u.muxerMuxer.AddHandler(m.ID, nil)
u.muxerIDs = append(u.muxerIDs, m.ID)
}
u.securityIDs = make([]protocol.ID, 0, len(security))
for _, s := range security {
u.securityMuxer.AddHandler(s.ID(), nil)
u.securityIDs = append(u.securityIDs, s.ID())
}
return u, nil
}
// UpgradeListener upgrades the passed multiaddr-net listener into a full libp2p-transport listener.
func (u *upgrader) UpgradeListener(t transport.Transport, list manet.Listener) transport.Listener {
ctx, cancel := context.WithCancel(context.Background())
l := &listener{
Listener: list,
upgrader: u,
transport: t,
rcmgr: u.rcmgr,
threshold: newThreshold(AcceptQueueLength),
incoming: make(chan transport.CapableConn),
cancel: cancel,
ctx: ctx,
}
go l.handleIncoming()
return l
}
// Upgrade upgrades the multiaddr/net connection into a full libp2p-transport connection.
func (u *upgrader) Upgrade(ctx context.Context, t transport.Transport, maconn manet.Conn, dir network.Direction, p peer.ID, connScope network.ConnManagementScope) (transport.CapableConn, error) {
c, err := u.upgrade(ctx, t, maconn, dir, p, connScope)
if err != nil {
connScope.Done()
return nil, err
}
return c, nil
}
func (u *upgrader) upgrade(ctx context.Context, t transport.Transport, maconn manet.Conn, dir network.Direction, p peer.ID, connScope network.ConnManagementScope) (transport.CapableConn, error) {
if dir == network.DirOutbound && p == "" {
return nil, ErrNilPeer
}
var stat network.ConnStats
if cs, ok := maconn.(network.ConnStat); ok {
stat = cs.Stat()
}
var conn net.Conn = maconn
if u.psk != nil {
pconn, err := pnet.NewProtectedConn(u.psk, conn)
if err != nil {
conn.Close()
return nil, fmt.Errorf("failed to setup private network protector: %w", err)
}
conn = pconn
} else if ipnet.ForcePrivateNetwork {
log.Error("tried to dial with no Private Network Protector but usage of Private Networks is forced by the environment")
return nil, ipnet.ErrNotInPrivateNetwork
}
isServer := dir == network.DirInbound
sconn, security, err := u.setupSecurity(ctx, conn, p, isServer)
if err != nil {
conn.Close()
return nil, fmt.Errorf("failed to negotiate security protocol: %w", err)
}
// call the connection gater, if one is registered.
if u.connGater != nil && !u.connGater.InterceptSecured(dir, sconn.RemotePeer(), maconn) {
if err := maconn.Close(); err != nil {
log.Errorw("failed to close connection", "peer", p, "addr", maconn.RemoteMultiaddr(), "error", err)
}
return nil, fmt.Errorf("gater rejected connection with peer %s and addr %s with direction %d",
sconn.RemotePeer(), maconn.RemoteMultiaddr(), dir)
}
// Only call SetPeer if it hasn't already been set -- this can happen when we don't know
// the peer in advance and in some bug scenarios.
if connScope.PeerScope() == nil {
if err := connScope.SetPeer(sconn.RemotePeer()); err != nil {
log.Debugw("resource manager blocked connection for peer", "peer", sconn.RemotePeer(), "addr", conn.RemoteAddr(), "error", err)
if err := maconn.Close(); err != nil {
log.Errorw("failed to close connection", "peer", p, "addr", maconn.RemoteMultiaddr(), "error", err)
}
return nil, fmt.Errorf("resource manager connection with peer %s and addr %s with direction %d",
sconn.RemotePeer(), maconn.RemoteMultiaddr(), dir)
}
}
muxer, smconn, err := u.setupMuxer(ctx, sconn, isServer, connScope.PeerScope())
if err != nil {
sconn.Close()
return nil, fmt.Errorf("failed to negotiate stream multiplexer: %w", err)
}
tc := &transportConn{
MuxedConn: smconn,
ConnMultiaddrs: maconn,
ConnSecurity: sconn,
transport: t,
stat: stat,
scope: connScope,
muxer: muxer,
security: security,
usedEarlyMuxerNegotiation: sconn.ConnState().UsedEarlyMuxerNegotiation,
}
return tc, nil
}
func (u *upgrader) setupSecurity(ctx context.Context, conn net.Conn, p peer.ID, isServer bool) (sec.SecureConn, protocol.ID, error) {
st, err := u.negotiateSecurity(ctx, conn, isServer)
if err != nil {
return nil, "", err
}
if isServer {
sconn, err := st.SecureInbound(ctx, conn, p)
return sconn, st.ID(), err
}
sconn, err := st.SecureOutbound(ctx, conn, p)
return sconn, st.ID(), err
}
func (u *upgrader) negotiateMuxer(nc net.Conn, isServer bool) (*StreamMuxer, error) {
if err := nc.SetDeadline(time.Now().Add(defaultNegotiateTimeout)); err != nil {
return nil, err
}
var proto protocol.ID
if isServer {
selected, _, err := u.muxerMuxer.Negotiate(nc)
if err != nil {
return nil, err
}
proto = selected
} else {
selected, err := mss.SelectOneOf(u.muxerIDs, nc)
if err != nil {
return nil, err
}
proto = selected
}
if err := nc.SetDeadline(time.Time{}); err != nil {
return nil, err
}
if m := u.getMuxerByID(proto); m != nil {
return m, nil
}
return nil, fmt.Errorf("selected protocol we don't have a transport for")
}
func (u *upgrader) getMuxerByID(id protocol.ID) *StreamMuxer {
for _, m := range u.muxers {
if m.ID == id {
return &m
}
}
return nil
}
func (u *upgrader) setupMuxer(ctx context.Context, conn sec.SecureConn, server bool, scope network.PeerScope) (protocol.ID, network.MuxedConn, error) {
muxerSelected := conn.ConnState().StreamMultiplexer
// Use muxer selected from security handshake if available. Otherwise fall back to multistream-selection.
if len(muxerSelected) > 0 {
m := u.getMuxerByID(muxerSelected)
if m == nil {
return "", nil, fmt.Errorf("selected a muxer we don't know: %s", muxerSelected)
}
c, err := m.Muxer.NewConn(conn, server, scope)
if err != nil {
return "", nil, err
}
return muxerSelected, c, nil
}
type result struct {
smconn network.MuxedConn
muxerID protocol.ID
err error
}
done := make(chan result, 1)
// TODO: The muxer should take a context.
go func() {
m, err := u.negotiateMuxer(conn, server)
if err != nil {
done <- result{err: err}
return
}
smconn, err := m.Muxer.NewConn(conn, server, scope)
done <- result{smconn: smconn, muxerID: m.ID, err: err}
}()
select {
case r := <-done:
return r.muxerID, r.smconn, r.err
case <-ctx.Done():
// interrupt this process
conn.Close()
// wait to finish
<-done
return "", nil, ctx.Err()
}
}
func (u *upgrader) getSecurityByID(id protocol.ID) sec.SecureTransport {
for _, s := range u.security {
if s.ID() == id {
return s
}
}
return nil
}
func (u *upgrader) negotiateSecurity(ctx context.Context, insecure net.Conn, server bool) (sec.SecureTransport, error) {
type result struct {
proto protocol.ID
err error
}
done := make(chan result, 1)
go func() {
if server {
var r result
r.proto, _, r.err = u.securityMuxer.Negotiate(insecure)
done <- r
return
}
var r result
r.proto, r.err = mss.SelectOneOf(u.securityIDs, insecure)
done <- r
}()
select {
case r := <-done:
if r.err != nil {
return nil, r.err
}
if s := u.getSecurityByID(r.proto); s != nil {
return s, nil
}
return nil, fmt.Errorf("selected unknown security transport: %s", r.proto)
case <-ctx.Done():
// We *must* do this. We have outstanding work on the connection, and it's no longer safe to use.
insecure.Close()
<-done // wait to stop using the connection.
return nil, ctx.Err()
}
}