Integrate BACKBEAT SDK and resolve KACHING license validation

Major integrations and fixes:
- Added BACKBEAT SDK integration for P2P operation timing
- Implemented beat-aware status tracking for distributed operations
- Added Docker secrets support for secure license management
- Resolved KACHING license validation via HTTPS/TLS
- Updated docker-compose configuration for clean stack deployment
- Disabled rollback policies to prevent deployment failures
- Added license credential storage (CHORUS-DEV-MULTI-001)

Technical improvements:
- BACKBEAT P2P operation tracking with phase management
- Enhanced configuration system with file-based secrets
- Improved error handling for license validation
- Clean separation of KACHING and CHORUS deployment stacks

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-09-06 07:56:26 +10:00
parent 543ab216f9
commit 9bdcbe0447
4730 changed files with 1480093 additions and 1916 deletions

View File

@@ -0,0 +1,27 @@
package holepunch
import (
"github.com/libp2p/go-libp2p/core/peer"
ma "github.com/multiformats/go-multiaddr"
)
// WithAddrFilter is a Service option that enables multiaddress filtering.
// It allows to only send a subset of observed addresses to the remote
// peer. E.g., only announce TCP or QUIC multi addresses instead of both.
// It also allows to only consider a subset of received multi addresses
// that remote peers announced to us.
// Theoretically, this API also allows to add multi addresses in both cases.
func WithAddrFilter(f AddrFilter) Option {
return func(hps *Service) error {
hps.filter = f
return nil
}
}
// AddrFilter defines the interface for the multi address filtering.
type AddrFilter interface {
// FilterLocal filters the multi addresses that are sent to the remote peer.
FilterLocal(remoteID peer.ID, maddrs []ma.Multiaddr) []ma.Multiaddr
// FilterRemote filters the multi addresses received from the remote peer.
FilterRemote(remoteID peer.ID, maddrs []ma.Multiaddr) []ma.Multiaddr
}

View File

@@ -0,0 +1,289 @@
package holepunch
import (
"context"
"errors"
"fmt"
"sync"
"time"
"github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/p2p/protocol/holepunch/pb"
"github.com/libp2p/go-libp2p/p2p/protocol/identify"
"github.com/libp2p/go-msgio/pbio"
ma "github.com/multiformats/go-multiaddr"
manet "github.com/multiformats/go-multiaddr/net"
)
//go:generate protoc --proto_path=$PWD:$PWD/../../.. --go_out=. --go_opt=Mpb/holepunch.proto=./pb pb/holepunch.proto
// ErrHolePunchActive is returned from DirectConnect when another hole punching attempt is currently running
var ErrHolePunchActive = errors.New("another hole punching attempt to this peer is active")
const (
dialTimeout = 5 * time.Second
maxRetries = 3
)
// The holePuncher is run on the peer that's behind a NAT / Firewall.
// It observes new incoming connections via a relay that it has a reservation with,
// and initiates the DCUtR protocol with them.
// It then first tries to establish a direct connection, and if that fails, it
// initiates a hole punch.
type holePuncher struct {
ctx context.Context
ctxCancel context.CancelFunc
host host.Host
refCount sync.WaitGroup
ids identify.IDService
// active hole punches for deduplicating
activeMx sync.Mutex
active map[peer.ID]struct{}
closeMx sync.RWMutex
closed bool
tracer *tracer
filter AddrFilter
}
func newHolePuncher(h host.Host, ids identify.IDService, tracer *tracer, filter AddrFilter) *holePuncher {
hp := &holePuncher{
host: h,
ids: ids,
active: make(map[peer.ID]struct{}),
tracer: tracer,
filter: filter,
}
hp.ctx, hp.ctxCancel = context.WithCancel(context.Background())
h.Network().Notify((*netNotifiee)(hp))
return hp
}
func (hp *holePuncher) beginDirectConnect(p peer.ID) error {
hp.closeMx.RLock()
defer hp.closeMx.RUnlock()
if hp.closed {
return ErrClosed
}
hp.activeMx.Lock()
defer hp.activeMx.Unlock()
if _, ok := hp.active[p]; ok {
return ErrHolePunchActive
}
hp.active[p] = struct{}{}
return nil
}
// DirectConnect attempts to make a direct connection with a remote peer.
// It first attempts a direct dial (if we have a public address of that peer), and then
// coordinates a hole punch over the given relay connection.
func (hp *holePuncher) DirectConnect(p peer.ID) error {
if err := hp.beginDirectConnect(p); err != nil {
return err
}
defer func() {
hp.activeMx.Lock()
delete(hp.active, p)
hp.activeMx.Unlock()
}()
return hp.directConnect(p)
}
func (hp *holePuncher) directConnect(rp peer.ID) error {
// short-circuit check to see if we already have a direct connection
if getDirectConnection(hp.host, rp) != nil {
return nil
}
// short-circuit hole punching if a direct dial works.
// attempt a direct connection ONLY if we have a public address for the remote peer
for _, a := range hp.host.Peerstore().Addrs(rp) {
if manet.IsPublicAddr(a) && !isRelayAddress(a) {
forceDirectConnCtx := network.WithForceDirectDial(hp.ctx, "hole-punching")
dialCtx, cancel := context.WithTimeout(forceDirectConnCtx, dialTimeout)
tstart := time.Now()
// This dials *all* public addresses from the peerstore.
err := hp.host.Connect(dialCtx, peer.AddrInfo{ID: rp})
dt := time.Since(tstart)
cancel()
if err != nil {
hp.tracer.DirectDialFailed(rp, dt, err)
break
}
hp.tracer.DirectDialSuccessful(rp, dt)
log.Debugw("direct connection to peer successful, no need for a hole punch", "peer", rp)
return nil
}
}
log.Debugw("got inbound proxy conn", "peer", rp)
// hole punch
for i := 1; i <= maxRetries; i++ {
addrs, obsAddrs, rtt, err := hp.initiateHolePunch(rp)
if err != nil {
log.Debugw("hole punching failed", "peer", rp, "error", err)
hp.tracer.ProtocolError(rp, err)
return err
}
synTime := rtt / 2
log.Debugf("peer RTT is %s; starting hole punch in %s", rtt, synTime)
// wait for sync to reach the other peer and then punch a hole for it in our NAT
// by attempting a connect to it.
timer := time.NewTimer(synTime)
select {
case start := <-timer.C:
pi := peer.AddrInfo{
ID: rp,
Addrs: addrs,
}
hp.tracer.StartHolePunch(rp, addrs, rtt)
hp.tracer.HolePunchAttempt(pi.ID)
err := holePunchConnect(hp.ctx, hp.host, pi, true)
dt := time.Since(start)
hp.tracer.EndHolePunch(rp, dt, err)
if err == nil {
log.Debugw("hole punching with successful", "peer", rp, "time", dt)
hp.tracer.HolePunchFinished("initiator", i, addrs, obsAddrs, getDirectConnection(hp.host, rp))
return nil
}
case <-hp.ctx.Done():
timer.Stop()
return hp.ctx.Err()
}
if i == maxRetries {
hp.tracer.HolePunchFinished("initiator", maxRetries, addrs, obsAddrs, nil)
}
}
return fmt.Errorf("all retries for hole punch with peer %s failed", rp)
}
// initiateHolePunch opens a new hole punching coordination stream,
// exchanges the addresses and measures the RTT.
func (hp *holePuncher) initiateHolePunch(rp peer.ID) ([]ma.Multiaddr, []ma.Multiaddr, time.Duration, error) {
hpCtx := network.WithUseTransient(hp.ctx, "hole-punch")
sCtx := network.WithNoDial(hpCtx, "hole-punch")
str, err := hp.host.NewStream(sCtx, rp, Protocol)
if err != nil {
return nil, nil, 0, fmt.Errorf("failed to open hole-punching stream: %w", err)
}
defer str.Close()
addr, obsAddr, rtt, err := hp.initiateHolePunchImpl(str)
if err != nil {
log.Debugf("%s", err)
str.Reset()
return addr, obsAddr, rtt, err
}
return addr, obsAddr, rtt, err
}
func (hp *holePuncher) initiateHolePunchImpl(str network.Stream) ([]ma.Multiaddr, []ma.Multiaddr, time.Duration, error) {
if err := str.Scope().SetService(ServiceName); err != nil {
return nil, nil, 0, fmt.Errorf("error attaching stream to holepunch service: %s", err)
}
if err := str.Scope().ReserveMemory(maxMsgSize, network.ReservationPriorityAlways); err != nil {
return nil, nil, 0, fmt.Errorf("error reserving memory for stream: %s", err)
}
defer str.Scope().ReleaseMemory(maxMsgSize)
w := pbio.NewDelimitedWriter(str)
rd := pbio.NewDelimitedReader(str, maxMsgSize)
str.SetDeadline(time.Now().Add(StreamTimeout))
// send a CONNECT and start RTT measurement.
obsAddrs := removeRelayAddrs(hp.ids.OwnObservedAddrs())
if hp.filter != nil {
obsAddrs = hp.filter.FilterLocal(str.Conn().RemotePeer(), obsAddrs)
}
if len(obsAddrs) == 0 {
return nil, nil, 0, errors.New("aborting hole punch initiation as we have no public address")
}
start := time.Now()
if err := w.WriteMsg(&pb.HolePunch{
Type: pb.HolePunch_CONNECT.Enum(),
ObsAddrs: addrsToBytes(obsAddrs),
}); err != nil {
str.Reset()
return nil, nil, 0, err
}
// wait for a CONNECT message from the remote peer
var msg pb.HolePunch
if err := rd.ReadMsg(&msg); err != nil {
return nil, nil, 0, fmt.Errorf("failed to read CONNECT message from remote peer: %w", err)
}
rtt := time.Since(start)
if t := msg.GetType(); t != pb.HolePunch_CONNECT {
return nil, nil, 0, fmt.Errorf("expect CONNECT message, got %s", t)
}
addrs := removeRelayAddrs(addrsFromBytes(msg.ObsAddrs))
if hp.filter != nil {
addrs = hp.filter.FilterRemote(str.Conn().RemotePeer(), addrs)
}
if len(addrs) == 0 {
return nil, nil, 0, errors.New("didn't receive any public addresses in CONNECT")
}
if err := w.WriteMsg(&pb.HolePunch{Type: pb.HolePunch_SYNC.Enum()}); err != nil {
return nil, nil, 0, fmt.Errorf("failed to send SYNC message for hole punching: %w", err)
}
return addrs, obsAddrs, rtt, nil
}
func (hp *holePuncher) Close() error {
hp.closeMx.Lock()
hp.closed = true
hp.closeMx.Unlock()
hp.ctxCancel()
hp.refCount.Wait()
return nil
}
type netNotifiee holePuncher
func (nn *netNotifiee) Connected(_ network.Network, conn network.Conn) {
hs := (*holePuncher)(nn)
// Hole punch if it's an inbound proxy connection.
// If we already have a direct connection with the remote peer, this will be a no-op.
if conn.Stat().Direction == network.DirInbound && isRelayAddress(conn.RemoteMultiaddr()) {
hs.refCount.Add(1)
go func() {
defer hs.refCount.Done()
select {
// waiting for Identify here will allow us to access the peer's public and observed addresses
// that we can dial to for a hole punch.
case <-hs.ids.IdentifyWait(conn):
case <-hs.ctx.Done():
return
}
_ = hs.DirectConnect(conn.RemotePeer())
}()
}
}
func (nn *netNotifiee) Disconnected(_ network.Network, v network.Conn) {}
func (nn *netNotifiee) Listen(n network.Network, a ma.Multiaddr) {}
func (nn *netNotifiee) ListenClose(n network.Network, a ma.Multiaddr) {}

View File

@@ -0,0 +1,187 @@
package holepunch
import (
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/p2p/metricshelper"
ma "github.com/multiformats/go-multiaddr"
"github.com/prometheus/client_golang/prometheus"
)
const metricNamespace = "libp2p_holepunch"
var (
directDialsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: "direct_dials_total",
Help: "Direct Dials Total",
},
[]string{"outcome"},
)
hpAddressOutcomesTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: "address_outcomes_total",
Help: "Hole Punch outcomes by Transport",
},
[]string{"side", "num_attempts", "ipv", "transport", "outcome"},
)
hpOutcomesTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: "outcomes_total",
Help: "Hole Punch outcomes overall",
},
[]string{"side", "num_attempts", "outcome"},
)
collectors = []prometheus.Collector{
directDialsTotal,
hpAddressOutcomesTotal,
hpOutcomesTotal,
}
)
type MetricsTracer interface {
HolePunchFinished(side string, attemptNum int, theirAddrs []ma.Multiaddr, ourAddr []ma.Multiaddr, directConn network.ConnMultiaddrs)
DirectDialFinished(success bool)
}
type metricsTracer struct{}
var _ MetricsTracer = &metricsTracer{}
type metricsTracerSetting struct {
reg prometheus.Registerer
}
type MetricsTracerOption func(*metricsTracerSetting)
func WithRegisterer(reg prometheus.Registerer) MetricsTracerOption {
return func(s *metricsTracerSetting) {
if reg != nil {
s.reg = reg
}
}
}
func NewMetricsTracer(opts ...MetricsTracerOption) MetricsTracer {
setting := &metricsTracerSetting{reg: prometheus.DefaultRegisterer}
for _, opt := range opts {
opt(setting)
}
metricshelper.RegisterCollectors(setting.reg, collectors...)
// initialise metrics's labels so that the first data point is handled correctly
for _, side := range []string{"initiator", "receiver"} {
for _, numAttempts := range []string{"1", "2", "3", "4"} {
for _, outcome := range []string{"success", "failed", "cancelled", "no_suitable_address"} {
for _, ipv := range []string{"ip4", "ip6"} {
for _, transport := range []string{"quic", "quic-v1", "tcp", "webtransport"} {
hpAddressOutcomesTotal.WithLabelValues(side, numAttempts, ipv, transport, outcome)
}
}
if outcome == "cancelled" {
// not a valid outcome for the overall holepunch metric
continue
}
hpOutcomesTotal.WithLabelValues(side, numAttempts, outcome)
}
}
}
return &metricsTracer{}
}
// HolePunchFinished tracks metrics completion of a holepunch. Metrics are tracked on
// a holepunch attempt level and on individual addresses involved in a holepunch.
//
// outcome for an address is computed as:
//
// - success:
// A direct connection was established with the peer using this address
// - cancelled:
// A direct connection was established with the peer but not using this address
// - failed:
// No direct connection was made to the peer and the peer reported an address
// with the same transport as this address
// - no_suitable_address:
// The peer reported no address with the same transport as this address
func (mt *metricsTracer) HolePunchFinished(side string, numAttempts int,
remoteAddrs []ma.Multiaddr, localAddrs []ma.Multiaddr, directConn network.ConnMultiaddrs) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = append(*tags, side, getNumAttemptString(numAttempts))
var dipv, dtransport string
if directConn != nil {
dipv = metricshelper.GetIPVersion(directConn.LocalMultiaddr())
dtransport = metricshelper.GetTransport(directConn.LocalMultiaddr())
}
matchingAddressCount := 0
// calculate holepunch outcome for all the addresses involved
for _, la := range localAddrs {
lipv := metricshelper.GetIPVersion(la)
ltransport := metricshelper.GetTransport(la)
matchingAddress := false
for _, ra := range remoteAddrs {
ripv := metricshelper.GetIPVersion(ra)
rtransport := metricshelper.GetTransport(ra)
if ripv == lipv && rtransport == ltransport {
// the peer reported an address with the same transport
matchingAddress = true
matchingAddressCount++
*tags = append(*tags, ripv, rtransport)
if directConn != nil && dipv == ripv && dtransport == rtransport {
// the connection was made using this address
*tags = append(*tags, "success")
} else if directConn != nil {
// connection was made but not using this address
*tags = append(*tags, "cancelled")
} else {
// no connection was made
*tags = append(*tags, "failed")
}
hpAddressOutcomesTotal.WithLabelValues(*tags...).Inc()
*tags = (*tags)[:2] // 2 because we want to keep (side, numAttempts)
break
}
}
if !matchingAddress {
*tags = append(*tags, lipv, ltransport, "no_suitable_address")
hpAddressOutcomesTotal.WithLabelValues(*tags...).Inc()
*tags = (*tags)[:2] // 2 because we want to keep (side, numAttempts)
}
}
outcome := "failed"
if directConn != nil {
outcome = "success"
} else if matchingAddressCount == 0 {
// there were no matching addresses, this attempt was going to fail
outcome = "no_suitable_address"
}
*tags = append(*tags, outcome)
hpOutcomesTotal.WithLabelValues(*tags...).Inc()
}
func getNumAttemptString(numAttempt int) string {
var attemptStr = [...]string{"0", "1", "2", "3", "4", "5"}
if numAttempt > 5 {
return "> 5"
}
return attemptStr[numAttempt]
}
func (mt *metricsTracer) DirectDialFinished(success bool) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
if success {
*tags = append(*tags, "success")
} else {
*tags = append(*tags, "failed")
}
directDialsTotal.WithLabelValues(*tags...).Inc()
}

View File

@@ -0,0 +1,215 @@
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.30.0
// protoc v3.21.12
// source: pb/holepunch.proto
package pb
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
type HolePunch_Type int32
const (
HolePunch_CONNECT HolePunch_Type = 100
HolePunch_SYNC HolePunch_Type = 300
)
// Enum value maps for HolePunch_Type.
var (
HolePunch_Type_name = map[int32]string{
100: "CONNECT",
300: "SYNC",
}
HolePunch_Type_value = map[string]int32{
"CONNECT": 100,
"SYNC": 300,
}
)
func (x HolePunch_Type) Enum() *HolePunch_Type {
p := new(HolePunch_Type)
*p = x
return p
}
func (x HolePunch_Type) String() string {
return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
}
func (HolePunch_Type) Descriptor() protoreflect.EnumDescriptor {
return file_pb_holepunch_proto_enumTypes[0].Descriptor()
}
func (HolePunch_Type) Type() protoreflect.EnumType {
return &file_pb_holepunch_proto_enumTypes[0]
}
func (x HolePunch_Type) Number() protoreflect.EnumNumber {
return protoreflect.EnumNumber(x)
}
// Deprecated: Do not use.
func (x *HolePunch_Type) UnmarshalJSON(b []byte) error {
num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b)
if err != nil {
return err
}
*x = HolePunch_Type(num)
return nil
}
// Deprecated: Use HolePunch_Type.Descriptor instead.
func (HolePunch_Type) EnumDescriptor() ([]byte, []int) {
return file_pb_holepunch_proto_rawDescGZIP(), []int{0, 0}
}
// spec: https://github.com/libp2p/specs/blob/master/relay/DCUtR.md
type HolePunch struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
Type *HolePunch_Type `protobuf:"varint,1,req,name=type,enum=holepunch.pb.HolePunch_Type" json:"type,omitempty"`
ObsAddrs [][]byte `protobuf:"bytes,2,rep,name=ObsAddrs" json:"ObsAddrs,omitempty"`
}
func (x *HolePunch) Reset() {
*x = HolePunch{}
if protoimpl.UnsafeEnabled {
mi := &file_pb_holepunch_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *HolePunch) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*HolePunch) ProtoMessage() {}
func (x *HolePunch) ProtoReflect() protoreflect.Message {
mi := &file_pb_holepunch_proto_msgTypes[0]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use HolePunch.ProtoReflect.Descriptor instead.
func (*HolePunch) Descriptor() ([]byte, []int) {
return file_pb_holepunch_proto_rawDescGZIP(), []int{0}
}
func (x *HolePunch) GetType() HolePunch_Type {
if x != nil && x.Type != nil {
return *x.Type
}
return HolePunch_CONNECT
}
func (x *HolePunch) GetObsAddrs() [][]byte {
if x != nil {
return x.ObsAddrs
}
return nil
}
var File_pb_holepunch_proto protoreflect.FileDescriptor
var file_pb_holepunch_proto_rawDesc = []byte{
0x0a, 0x12, 0x70, 0x62, 0x2f, 0x68, 0x6f, 0x6c, 0x65, 0x70, 0x75, 0x6e, 0x63, 0x68, 0x2e, 0x70,
0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0c, 0x68, 0x6f, 0x6c, 0x65, 0x70, 0x75, 0x6e, 0x63, 0x68, 0x2e,
0x70, 0x62, 0x22, 0x79, 0x0a, 0x09, 0x48, 0x6f, 0x6c, 0x65, 0x50, 0x75, 0x6e, 0x63, 0x68, 0x12,
0x30, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x02, 0x28, 0x0e, 0x32, 0x1c, 0x2e,
0x68, 0x6f, 0x6c, 0x65, 0x70, 0x75, 0x6e, 0x63, 0x68, 0x2e, 0x70, 0x62, 0x2e, 0x48, 0x6f, 0x6c,
0x65, 0x50, 0x75, 0x6e, 0x63, 0x68, 0x2e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70,
0x65, 0x12, 0x1a, 0x0a, 0x08, 0x4f, 0x62, 0x73, 0x41, 0x64, 0x64, 0x72, 0x73, 0x18, 0x02, 0x20,
0x03, 0x28, 0x0c, 0x52, 0x08, 0x4f, 0x62, 0x73, 0x41, 0x64, 0x64, 0x72, 0x73, 0x22, 0x1e, 0x0a,
0x04, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54,
0x10, 0x64, 0x12, 0x09, 0x0a, 0x04, 0x53, 0x59, 0x4e, 0x43, 0x10, 0xac, 0x02,
}
var (
file_pb_holepunch_proto_rawDescOnce sync.Once
file_pb_holepunch_proto_rawDescData = file_pb_holepunch_proto_rawDesc
)
func file_pb_holepunch_proto_rawDescGZIP() []byte {
file_pb_holepunch_proto_rawDescOnce.Do(func() {
file_pb_holepunch_proto_rawDescData = protoimpl.X.CompressGZIP(file_pb_holepunch_proto_rawDescData)
})
return file_pb_holepunch_proto_rawDescData
}
var file_pb_holepunch_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
var file_pb_holepunch_proto_msgTypes = make([]protoimpl.MessageInfo, 1)
var file_pb_holepunch_proto_goTypes = []interface{}{
(HolePunch_Type)(0), // 0: holepunch.pb.HolePunch.Type
(*HolePunch)(nil), // 1: holepunch.pb.HolePunch
}
var file_pb_holepunch_proto_depIdxs = []int32{
0, // 0: holepunch.pb.HolePunch.type:type_name -> holepunch.pb.HolePunch.Type
1, // [1:1] is the sub-list for method output_type
1, // [1:1] is the sub-list for method input_type
1, // [1:1] is the sub-list for extension type_name
1, // [1:1] is the sub-list for extension extendee
0, // [0:1] is the sub-list for field type_name
}
func init() { file_pb_holepunch_proto_init() }
func file_pb_holepunch_proto_init() {
if File_pb_holepunch_proto != nil {
return
}
if !protoimpl.UnsafeEnabled {
file_pb_holepunch_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*HolePunch); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: file_pb_holepunch_proto_rawDesc,
NumEnums: 1,
NumMessages: 1,
NumExtensions: 0,
NumServices: 0,
},
GoTypes: file_pb_holepunch_proto_goTypes,
DependencyIndexes: file_pb_holepunch_proto_depIdxs,
EnumInfos: file_pb_holepunch_proto_enumTypes,
MessageInfos: file_pb_holepunch_proto_msgTypes,
}.Build()
File_pb_holepunch_proto = out.File
file_pb_holepunch_proto_rawDesc = nil
file_pb_holepunch_proto_goTypes = nil
file_pb_holepunch_proto_depIdxs = nil
}

View File

@@ -0,0 +1,14 @@
syntax = "proto2";
package holepunch.pb;
// spec: https://github.com/libp2p/specs/blob/master/relay/DCUtR.md
message HolePunch {
enum Type {
CONNECT = 100;
SYNC = 300;
}
required Type type=1;
repeated bytes ObsAddrs = 2;
}

View File

@@ -0,0 +1,285 @@
package holepunch
import (
"context"
"errors"
"fmt"
"sync"
"time"
logging "github.com/ipfs/go-log/v2"
"github.com/libp2p/go-libp2p/core/event"
"github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/core/protocol"
"github.com/libp2p/go-libp2p/p2p/host/eventbus"
"github.com/libp2p/go-libp2p/p2p/protocol/holepunch/pb"
"github.com/libp2p/go-libp2p/p2p/protocol/identify"
"github.com/libp2p/go-msgio/pbio"
ma "github.com/multiformats/go-multiaddr"
)
// Protocol is the libp2p protocol for Hole Punching.
const Protocol protocol.ID = "/libp2p/dcutr"
var log = logging.Logger("p2p-holepunch")
// StreamTimeout is the timeout for the hole punch protocol stream.
var StreamTimeout = 1 * time.Minute
const (
ServiceName = "libp2p.holepunch"
maxMsgSize = 4 * 1024 // 4K
)
// ErrClosed is returned when the hole punching is closed
var ErrClosed = errors.New("hole punching service closing")
type Option func(*Service) error
// The Service runs on every node that supports the DCUtR protocol.
type Service struct {
ctx context.Context
ctxCancel context.CancelFunc
host host.Host
ids identify.IDService
holePuncherMx sync.Mutex
holePuncher *holePuncher
hasPublicAddrsChan chan struct{}
tracer *tracer
filter AddrFilter
refCount sync.WaitGroup
}
// NewService creates a new service that can be used for hole punching
// The Service runs on all hosts that support the DCUtR protocol,
// no matter if they are behind a NAT / firewall or not.
// The Service handles DCUtR streams (which are initiated from the node behind
// a NAT / Firewall once we establish a connection to them through a relay.
func NewService(h host.Host, ids identify.IDService, opts ...Option) (*Service, error) {
if ids == nil {
return nil, errors.New("identify service can't be nil")
}
ctx, cancel := context.WithCancel(context.Background())
s := &Service{
ctx: ctx,
ctxCancel: cancel,
host: h,
ids: ids,
hasPublicAddrsChan: make(chan struct{}),
}
for _, opt := range opts {
if err := opt(s); err != nil {
cancel()
return nil, err
}
}
s.tracer.Start()
s.refCount.Add(1)
go s.watchForPublicAddr()
return s, nil
}
func (s *Service) watchForPublicAddr() {
defer s.refCount.Done()
log.Debug("waiting until we have at least one public address", "peer", s.host.ID())
// TODO: We should have an event here that fires when identify discovers a new
// address (and when autonat confirms that address).
// As we currently don't have an event like this, just check our observed addresses
// regularly (exponential backoff starting at 250 ms, capped at 5s).
duration := 250 * time.Millisecond
const maxDuration = 5 * time.Second
t := time.NewTimer(duration)
defer t.Stop()
for {
if containsPublicAddr(s.ids.OwnObservedAddrs()) {
log.Debug("Host now has a public address. Starting holepunch protocol.")
s.host.SetStreamHandler(Protocol, s.handleNewStream)
break
}
select {
case <-s.ctx.Done():
return
case <-t.C:
duration *= 2
if duration > maxDuration {
duration = maxDuration
}
t.Reset(duration)
}
}
// Only start the holePuncher if we're behind a NAT / firewall.
sub, err := s.host.EventBus().Subscribe(&event.EvtLocalReachabilityChanged{}, eventbus.Name("holepunch"))
if err != nil {
log.Debugf("failed to subscripe to Reachability event: %s", err)
return
}
defer sub.Close()
for {
select {
case <-s.ctx.Done():
return
case e, ok := <-sub.Out():
if !ok {
return
}
if e.(event.EvtLocalReachabilityChanged).Reachability != network.ReachabilityPrivate {
continue
}
s.holePuncherMx.Lock()
s.holePuncher = newHolePuncher(s.host, s.ids, s.tracer, s.filter)
s.holePuncherMx.Unlock()
close(s.hasPublicAddrsChan)
return
}
}
}
// Close closes the Hole Punch Service.
func (s *Service) Close() error {
var err error
s.holePuncherMx.Lock()
if s.holePuncher != nil {
err = s.holePuncher.Close()
}
s.holePuncherMx.Unlock()
s.tracer.Close()
s.host.RemoveStreamHandler(Protocol)
s.ctxCancel()
s.refCount.Wait()
return err
}
func (s *Service) incomingHolePunch(str network.Stream) (rtt time.Duration, remoteAddrs []ma.Multiaddr, ownAddrs []ma.Multiaddr, err error) {
// sanity check: a hole punch request should only come from peers behind a relay
if !isRelayAddress(str.Conn().RemoteMultiaddr()) {
return 0, nil, nil, fmt.Errorf("received hole punch stream: %s", str.Conn().RemoteMultiaddr())
}
ownAddrs = removeRelayAddrs(s.ids.OwnObservedAddrs())
if s.filter != nil {
ownAddrs = s.filter.FilterLocal(str.Conn().RemotePeer(), ownAddrs)
}
// If we can't tell the peer where to dial us, there's no point in starting the hole punching.
if len(ownAddrs) == 0 {
return 0, nil, nil, errors.New("rejecting hole punch request, as we don't have any public addresses")
}
if err := str.Scope().ReserveMemory(maxMsgSize, network.ReservationPriorityAlways); err != nil {
log.Debugf("error reserving memory for stream: %s, err")
return 0, nil, nil, err
}
defer str.Scope().ReleaseMemory(maxMsgSize)
wr := pbio.NewDelimitedWriter(str)
rd := pbio.NewDelimitedReader(str, maxMsgSize)
// Read Connect message
msg := new(pb.HolePunch)
str.SetDeadline(time.Now().Add(StreamTimeout))
if err := rd.ReadMsg(msg); err != nil {
return 0, nil, nil, fmt.Errorf("failed to read message from initator: %w", err)
}
if t := msg.GetType(); t != pb.HolePunch_CONNECT {
return 0, nil, nil, fmt.Errorf("expected CONNECT message from initiator but got %d", t)
}
obsDial := removeRelayAddrs(addrsFromBytes(msg.ObsAddrs))
if s.filter != nil {
obsDial = s.filter.FilterRemote(str.Conn().RemotePeer(), obsDial)
}
log.Debugw("received hole punch request", "peer", str.Conn().RemotePeer(), "addrs", obsDial)
if len(obsDial) == 0 {
return 0, nil, nil, errors.New("expected CONNECT message to contain at least one address")
}
// Write CONNECT message
msg.Reset()
msg.Type = pb.HolePunch_CONNECT.Enum()
msg.ObsAddrs = addrsToBytes(ownAddrs)
tstart := time.Now()
if err := wr.WriteMsg(msg); err != nil {
return 0, nil, nil, fmt.Errorf("failed to write CONNECT message to initator: %w", err)
}
// Read SYNC message
msg.Reset()
if err := rd.ReadMsg(msg); err != nil {
return 0, nil, nil, fmt.Errorf("failed to read message from initator: %w", err)
}
if t := msg.GetType(); t != pb.HolePunch_SYNC {
return 0, nil, nil, fmt.Errorf("expected SYNC message from initiator but got %d", t)
}
return time.Since(tstart), obsDial, ownAddrs, nil
}
func (s *Service) handleNewStream(str network.Stream) {
// Check directionality of the underlying connection.
// Peer A receives an inbound connection from peer B.
// Peer A opens a new hole punch stream to peer B.
// Peer B receives this stream, calling this function.
// Peer B sees the underlying connection as an outbound connection.
if str.Conn().Stat().Direction == network.DirInbound {
str.Reset()
return
}
if err := str.Scope().SetService(ServiceName); err != nil {
log.Debugf("error attaching stream to holepunch service: %s", err)
str.Reset()
return
}
rp := str.Conn().RemotePeer()
rtt, addrs, ownAddrs, err := s.incomingHolePunch(str)
if err != nil {
s.tracer.ProtocolError(rp, err)
log.Debugw("error handling holepunching stream from", "peer", rp, "error", err)
str.Reset()
return
}
str.Close()
// Hole punch now by forcing a connect
pi := peer.AddrInfo{
ID: rp,
Addrs: addrs,
}
s.tracer.StartHolePunch(rp, addrs, rtt)
log.Debugw("starting hole punch", "peer", rp)
start := time.Now()
s.tracer.HolePunchAttempt(pi.ID)
err = holePunchConnect(s.ctx, s.host, pi, false)
dt := time.Since(start)
s.tracer.EndHolePunch(rp, dt, err)
s.tracer.HolePunchFinished("receiver", 1, addrs, ownAddrs, getDirectConnection(s.host, rp))
}
// DirectConnect is only exposed for testing purposes.
// TODO: find a solution for this.
func (s *Service) DirectConnect(p peer.ID) error {
<-s.hasPublicAddrsChan
s.holePuncherMx.Lock()
holePuncher := s.holePuncher
s.holePuncherMx.Unlock()
return holePuncher.DirectConnect(p)
}

View File

@@ -0,0 +1,297 @@
package holepunch
import (
"context"
"sync"
"time"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
ma "github.com/multiformats/go-multiaddr"
)
const (
tracerGCInterval = 2 * time.Minute
tracerCacheDuration = 5 * time.Minute
)
// WithTracer enables holepunch tracing with EventTracer et
func WithTracer(et EventTracer) Option {
return func(hps *Service) error {
hps.tracer = &tracer{
et: et,
mt: nil,
self: hps.host.ID(),
peers: make(map[peer.ID]struct {
counter int
last time.Time
}),
}
return nil
}
}
// WithMetricsTracer enables holepunch Tracing with MetricsTracer mt
func WithMetricsTracer(mt MetricsTracer) Option {
return func(hps *Service) error {
hps.tracer = &tracer{
et: nil,
mt: mt,
self: hps.host.ID(),
peers: make(map[peer.ID]struct {
counter int
last time.Time
}),
}
return nil
}
}
// WithMetricsAndEventTracer enables holepunch tracking with MetricsTracer and EventTracer
func WithMetricsAndEventTracer(mt MetricsTracer, et EventTracer) Option {
return func(hps *Service) error {
hps.tracer = &tracer{
et: et,
mt: mt,
self: hps.host.ID(),
peers: make(map[peer.ID]struct {
counter int
last time.Time
}),
}
return nil
}
}
type tracer struct {
et EventTracer
mt MetricsTracer
self peer.ID
refCount sync.WaitGroup
ctx context.Context
ctxCancel context.CancelFunc
mutex sync.Mutex
peers map[peer.ID]struct {
counter int
last time.Time
}
}
type EventTracer interface {
Trace(evt *Event)
}
type Event struct {
Timestamp int64 // UNIX nanos
Peer peer.ID // local peer ID
Remote peer.ID // remote peer ID
Type string // event type
Evt interface{} // the actual event
}
// Event Types
const (
DirectDialEvtT = "DirectDial"
ProtocolErrorEvtT = "ProtocolError"
StartHolePunchEvtT = "StartHolePunch"
EndHolePunchEvtT = "EndHolePunch"
HolePunchAttemptEvtT = "HolePunchAttempt"
)
// Event Objects
type DirectDialEvt struct {
Success bool
EllapsedTime time.Duration
Error string `json:",omitempty"`
}
type ProtocolErrorEvt struct {
Error string
}
type StartHolePunchEvt struct {
RemoteAddrs []string
RTT time.Duration
}
type EndHolePunchEvt struct {
Success bool
EllapsedTime time.Duration
Error string `json:",omitempty"`
}
type HolePunchAttemptEvt struct {
Attempt int
}
// tracer interface
func (t *tracer) DirectDialSuccessful(p peer.ID, dt time.Duration) {
if t == nil {
return
}
if t.et != nil {
t.et.Trace(&Event{
Timestamp: time.Now().UnixNano(),
Peer: t.self,
Remote: p,
Type: DirectDialEvtT,
Evt: &DirectDialEvt{
Success: true,
EllapsedTime: dt,
},
})
}
if t.mt != nil {
t.mt.DirectDialFinished(true)
}
}
func (t *tracer) DirectDialFailed(p peer.ID, dt time.Duration, err error) {
if t == nil {
return
}
if t.et != nil {
t.et.Trace(&Event{
Timestamp: time.Now().UnixNano(),
Peer: t.self,
Remote: p,
Type: DirectDialEvtT,
Evt: &DirectDialEvt{
Success: false,
EllapsedTime: dt,
Error: err.Error(),
},
})
}
if t.mt != nil {
t.mt.DirectDialFinished(false)
}
}
func (t *tracer) ProtocolError(p peer.ID, err error) {
if t != nil && t.et != nil {
t.et.Trace(&Event{
Timestamp: time.Now().UnixNano(),
Peer: t.self,
Remote: p,
Type: ProtocolErrorEvtT,
Evt: &ProtocolErrorEvt{
Error: err.Error(),
},
})
}
}
func (t *tracer) StartHolePunch(p peer.ID, obsAddrs []ma.Multiaddr, rtt time.Duration) {
if t != nil && t.et != nil {
addrs := make([]string, 0, len(obsAddrs))
for _, a := range obsAddrs {
addrs = append(addrs, a.String())
}
t.et.Trace(&Event{
Timestamp: time.Now().UnixNano(),
Peer: t.self,
Remote: p,
Type: StartHolePunchEvtT,
Evt: &StartHolePunchEvt{
RemoteAddrs: addrs,
RTT: rtt,
},
})
}
}
func (t *tracer) EndHolePunch(p peer.ID, dt time.Duration, err error) {
if t != nil && t.et != nil {
evt := &EndHolePunchEvt{
Success: err == nil,
EllapsedTime: dt,
}
if err != nil {
evt.Error = err.Error()
}
t.et.Trace(&Event{
Timestamp: time.Now().UnixNano(),
Peer: t.self,
Remote: p,
Type: EndHolePunchEvtT,
Evt: evt,
})
}
}
func (t *tracer) HolePunchFinished(side string, numAttempts int, theirAddrs []ma.Multiaddr, ourAddrs []ma.Multiaddr, directConn network.Conn) {
if t != nil && t.mt != nil {
t.mt.HolePunchFinished(side, numAttempts, theirAddrs, ourAddrs, directConn)
}
}
func (t *tracer) HolePunchAttempt(p peer.ID) {
if t != nil && t.et != nil {
now := time.Now()
t.mutex.Lock()
attempt := t.peers[p]
attempt.counter++
counter := attempt.counter
attempt.last = now
t.peers[p] = attempt
t.mutex.Unlock()
t.et.Trace(&Event{
Timestamp: now.UnixNano(),
Peer: t.self,
Remote: p,
Type: HolePunchAttemptEvtT,
Evt: &HolePunchAttemptEvt{Attempt: counter},
})
}
}
// gc cleans up the peers map. This is only run when tracer is initialised with a non nil
// EventTracer
func (t *tracer) gc() {
defer t.refCount.Done()
timer := time.NewTicker(tracerGCInterval)
defer timer.Stop()
for {
select {
case <-timer.C:
now := time.Now()
t.mutex.Lock()
for id, entry := range t.peers {
if entry.last.Before(now.Add(-tracerCacheDuration)) {
delete(t.peers, id)
}
}
t.mutex.Unlock()
case <-t.ctx.Done():
return
}
}
}
func (t *tracer) Start() {
if t != nil && t.et != nil {
t.ctx, t.ctxCancel = context.WithCancel(context.Background())
t.refCount.Add(1)
go t.gc()
}
}
func (t *tracer) Close() error {
if t != nil && t.et != nil {
t.ctxCancel()
t.refCount.Wait()
}
return nil
}

View File

@@ -0,0 +1,79 @@
package holepunch
import (
"context"
"github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
ma "github.com/multiformats/go-multiaddr"
manet "github.com/multiformats/go-multiaddr/net"
)
func containsPublicAddr(addrs []ma.Multiaddr) bool {
for _, addr := range addrs {
if isRelayAddress(addr) || !manet.IsPublicAddr(addr) {
continue
}
return true
}
return false
}
func removeRelayAddrs(addrs []ma.Multiaddr) []ma.Multiaddr {
result := make([]ma.Multiaddr, 0, len(addrs))
for _, addr := range addrs {
if !isRelayAddress(addr) {
result = append(result, addr)
}
}
return result
}
func isRelayAddress(a ma.Multiaddr) bool {
_, err := a.ValueForProtocol(ma.P_CIRCUIT)
return err == nil
}
func addrsToBytes(as []ma.Multiaddr) [][]byte {
bzs := make([][]byte, 0, len(as))
for _, a := range as {
bzs = append(bzs, a.Bytes())
}
return bzs
}
func addrsFromBytes(bzs [][]byte) []ma.Multiaddr {
addrs := make([]ma.Multiaddr, 0, len(bzs))
for _, bz := range bzs {
a, err := ma.NewMultiaddrBytes(bz)
if err == nil {
addrs = append(addrs, a)
}
}
return addrs
}
func getDirectConnection(h host.Host, p peer.ID) network.Conn {
for _, c := range h.Network().ConnsToPeer(p) {
if !isRelayAddress(c.RemoteMultiaddr()) {
return c
}
}
return nil
}
func holePunchConnect(ctx context.Context, host host.Host, pi peer.AddrInfo, isClient bool) error {
holePunchCtx := network.WithSimultaneousConnect(ctx, isClient, "hole-punching")
forceDirectConnCtx := network.WithForceDirectDial(holePunchCtx, "hole-punching")
dialCtx, cancel := context.WithTimeout(forceDirectConnCtx, dialTimeout)
defer cancel()
if err := host.Connect(dialCtx, pi); err != nil {
log.Debugw("hole punch attempt with peer failed", "peer ID", pi.ID, "error", err)
return err
}
log.Debugw("hole punch successful", "peer", pi.ID)
return nil
}