Integrate BACKBEAT SDK and resolve KACHING license validation

Major integrations and fixes:
- Added BACKBEAT SDK integration for P2P operation timing
- Implemented beat-aware status tracking for distributed operations
- Added Docker secrets support for secure license management
- Resolved KACHING license validation via HTTPS/TLS
- Updated docker-compose configuration for clean stack deployment
- Disabled rollback policies to prevent deployment failures
- Added license credential storage (CHORUS-DEV-MULTI-001)

Technical improvements:
- BACKBEAT P2P operation tracking with phase management
- Enhanced configuration system with file-based secrets
- Improved error handling for license validation
- Clean separation of KACHING and CHORUS deployment stacks

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-09-06 07:56:26 +10:00
parent 543ab216f9
commit 9bdcbe0447
4730 changed files with 1480093 additions and 1916 deletions

View File

@@ -0,0 +1,165 @@
package autorelay
import (
"encoding/binary"
ma "github.com/multiformats/go-multiaddr"
manet "github.com/multiformats/go-multiaddr/net"
)
// This function cleans up a relay's address set to remove private addresses and curtail
// addrsplosion.
func cleanupAddressSet(addrs []ma.Multiaddr) []ma.Multiaddr {
var public, private []ma.Multiaddr
for _, a := range addrs {
if isRelayAddr(a) {
continue
}
if manet.IsPublicAddr(a) || isDNSAddr(a) {
public = append(public, a)
continue
}
// discard unroutable addrs
if manet.IsPrivateAddr(a) {
private = append(private, a)
}
}
if !hasAddrsplosion(public) {
return public
}
return sanitizeAddrsplodedSet(public, private)
}
func isRelayAddr(a ma.Multiaddr) bool {
isRelay := false
ma.ForEach(a, func(c ma.Component) bool {
switch c.Protocol().Code {
case ma.P_CIRCUIT:
isRelay = true
return false
default:
return true
}
})
return isRelay
}
func isDNSAddr(a ma.Multiaddr) bool {
if first, _ := ma.SplitFirst(a); first != nil {
switch first.Protocol().Code {
case ma.P_DNS4, ma.P_DNS6, ma.P_DNSADDR:
return true
}
}
return false
}
// we have addrsplosion if for some protocol we advertise multiple ports on
// the same base address.
func hasAddrsplosion(addrs []ma.Multiaddr) bool {
aset := make(map[string]int)
for _, a := range addrs {
key, port := addrKeyAndPort(a)
xport, ok := aset[key]
if ok && port != xport {
return true
}
aset[key] = port
}
return false
}
func addrKeyAndPort(a ma.Multiaddr) (string, int) {
var (
key string
port int
)
ma.ForEach(a, func(c ma.Component) bool {
switch c.Protocol().Code {
case ma.P_TCP, ma.P_UDP:
port = int(binary.BigEndian.Uint16(c.RawValue()))
key += "/" + c.Protocol().Name
default:
val := c.Value()
if val == "" {
val = c.Protocol().Name
}
key += "/" + val
}
return true
})
return key, port
}
// clean up addrsplosion
// the following heuristic is used:
// - for each base address/protocol combination, if there are multiple ports advertised then
// only accept the default port if present.
// - If the default port is not present, we check for non-standard ports by tracking
// private port bindings if present.
// - If there is no default or private port binding, then we can't infer the correct
// port and give up and return all addrs (for that base address)
func sanitizeAddrsplodedSet(public, private []ma.Multiaddr) []ma.Multiaddr {
type portAndAddr struct {
addr ma.Multiaddr
port int
}
privports := make(map[int]struct{})
pubaddrs := make(map[string][]portAndAddr)
for _, a := range private {
_, port := addrKeyAndPort(a)
privports[port] = struct{}{}
}
for _, a := range public {
key, port := addrKeyAndPort(a)
pubaddrs[key] = append(pubaddrs[key], portAndAddr{addr: a, port: port})
}
var result []ma.Multiaddr
for _, pas := range pubaddrs {
if len(pas) == 1 {
// it's not addrsploded
result = append(result, pas[0].addr)
continue
}
haveAddr := false
for _, pa := range pas {
if _, ok := privports[pa.port]; ok {
// it matches a privately bound port, use it
result = append(result, pa.addr)
haveAddr = true
continue
}
if pa.port == 4001 || pa.port == 4002 {
// it's a default port, use it
result = append(result, pa.addr)
haveAddr = true
}
}
if !haveAddr {
// we weren't able to select a port; bite the bullet and use them all
for _, pa := range pas {
result = append(result, pa.addr)
}
}
}
return result
}

View File

@@ -0,0 +1,125 @@
package autorelay
import (
"context"
"errors"
"sync"
"github.com/libp2p/go-libp2p/core/event"
"github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/network"
basic "github.com/libp2p/go-libp2p/p2p/host/basic"
"github.com/libp2p/go-libp2p/p2p/host/eventbus"
logging "github.com/ipfs/go-log/v2"
ma "github.com/multiformats/go-multiaddr"
)
var log = logging.Logger("autorelay")
type AutoRelay struct {
refCount sync.WaitGroup
ctx context.Context
ctxCancel context.CancelFunc
conf *config
mx sync.Mutex
status network.Reachability
relayFinder *relayFinder
host host.Host
addrsF basic.AddrsFactory
metricsTracer MetricsTracer
}
func NewAutoRelay(bhost *basic.BasicHost, opts ...Option) (*AutoRelay, error) {
r := &AutoRelay{
host: bhost,
addrsF: bhost.AddrsFactory,
status: network.ReachabilityUnknown,
}
conf := defaultConfig
for _, opt := range opts {
if err := opt(&conf); err != nil {
return nil, err
}
}
r.ctx, r.ctxCancel = context.WithCancel(context.Background())
r.conf = &conf
r.relayFinder = newRelayFinder(bhost, conf.peerSource, &conf)
r.metricsTracer = &wrappedMetricsTracer{conf.metricsTracer}
bhost.AddrsFactory = r.hostAddrs
return r, nil
}
func (r *AutoRelay) Start() {
r.refCount.Add(1)
go func() {
defer r.refCount.Done()
r.background()
}()
}
func (r *AutoRelay) background() {
subReachability, err := r.host.EventBus().Subscribe(new(event.EvtLocalReachabilityChanged), eventbus.Name("autorelay (background)"))
if err != nil {
log.Debug("failed to subscribe to the EvtLocalReachabilityChanged")
return
}
defer subReachability.Close()
for {
select {
case <-r.ctx.Done():
return
case ev, ok := <-subReachability.Out():
if !ok {
return
}
// TODO: push changed addresses
evt := ev.(event.EvtLocalReachabilityChanged)
switch evt.Reachability {
case network.ReachabilityPrivate, network.ReachabilityUnknown:
err := r.relayFinder.Start()
if errors.Is(err, errAlreadyRunning) {
log.Debug("tried to start already running relay finder")
} else if err != nil {
log.Errorw("failed to start relay finder", "error", err)
} else {
r.metricsTracer.RelayFinderStatus(true)
}
case network.ReachabilityPublic:
r.relayFinder.Stop()
r.metricsTracer.RelayFinderStatus(false)
}
r.mx.Lock()
r.status = evt.Reachability
r.mx.Unlock()
}
}
}
func (r *AutoRelay) hostAddrs(addrs []ma.Multiaddr) []ma.Multiaddr {
return r.relayAddrs(r.addrsF(addrs))
}
func (r *AutoRelay) relayAddrs(addrs []ma.Multiaddr) []ma.Multiaddr {
r.mx.Lock()
defer r.mx.Unlock()
if r.status != network.ReachabilityPrivate {
return addrs
}
return r.relayFinder.relayAddrs(addrs)
}
func (r *AutoRelay) Close() error {
r.ctxCancel()
err := r.relayFinder.Stop()
r.refCount.Wait()
return err
}

View File

@@ -0,0 +1,23 @@
package autorelay
import (
"github.com/libp2p/go-libp2p/core/host"
)
type AutoRelayHost struct {
host.Host
ar *AutoRelay
}
func (h *AutoRelayHost) Close() error {
_ = h.ar.Close()
return h.Host.Close()
}
func (h *AutoRelayHost) Start() {
h.ar.Start()
}
func NewAutoRelayHost(h host.Host, ar *AutoRelay) *AutoRelayHost {
return &AutoRelayHost{Host: h, ar: ar}
}

View File

@@ -0,0 +1,373 @@
package autorelay
import (
"errors"
"github.com/libp2p/go-libp2p/p2p/metricshelper"
"github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/client"
pbv2 "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/pb"
"github.com/prometheus/client_golang/prometheus"
)
const metricNamespace = "libp2p_autorelay"
var (
status = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: "status",
Help: "relay finder active",
})
reservationsOpenedTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: "reservations_opened_total",
Help: "Reservations Opened",
},
)
reservationsClosedTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: "reservations_closed_total",
Help: "Reservations Closed",
},
)
reservationRequestsOutcomeTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: "reservation_requests_outcome_total",
Help: "Reservation Request Outcome",
},
[]string{"request_type", "outcome"},
)
relayAddressesUpdatedTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: "relay_addresses_updated_total",
Help: "Relay Addresses Updated Count",
},
)
relayAddressesCount = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: "relay_addresses_count",
Help: "Relay Addresses Count",
},
)
candidatesCircuitV2SupportTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: "candidates_circuit_v2_support_total",
Help: "Candidiates supporting circuit v2",
},
[]string{"support"},
)
candidatesTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: "candidates_total",
Help: "Candidates Total",
},
[]string{"type"},
)
candLoopState = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: "candidate_loop_state",
Help: "Candidate Loop State",
},
)
scheduledWorkTime = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: "scheduled_work_time",
Help: "Scheduled Work Times",
},
[]string{"work_type"},
)
desiredReservations = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: "desired_reservations",
Help: "Desired Reservations",
},
)
collectors = []prometheus.Collector{
status,
reservationsOpenedTotal,
reservationsClosedTotal,
reservationRequestsOutcomeTotal,
relayAddressesUpdatedTotal,
relayAddressesCount,
candidatesCircuitV2SupportTotal,
candidatesTotal,
candLoopState,
scheduledWorkTime,
desiredReservations,
}
)
type candidateLoopState int
const (
peerSourceRateLimited candidateLoopState = iota
waitingOnPeerChan
waitingForTrigger
stopped
)
// MetricsTracer is the interface for tracking metrics for autorelay
type MetricsTracer interface {
RelayFinderStatus(isActive bool)
ReservationEnded(cnt int)
ReservationOpened(cnt int)
ReservationRequestFinished(isRefresh bool, err error)
RelayAddressCount(int)
RelayAddressUpdated()
CandidateChecked(supportsCircuitV2 bool)
CandidateAdded(cnt int)
CandidateRemoved(cnt int)
CandidateLoopState(state candidateLoopState)
ScheduledWorkUpdated(scheduledWork *scheduledWorkTimes)
DesiredReservations(int)
}
type metricsTracer struct{}
var _ MetricsTracer = &metricsTracer{}
type metricsTracerSetting struct {
reg prometheus.Registerer
}
type MetricsTracerOption func(*metricsTracerSetting)
func WithRegisterer(reg prometheus.Registerer) MetricsTracerOption {
return func(s *metricsTracerSetting) {
if reg != nil {
s.reg = reg
}
}
}
func NewMetricsTracer(opts ...MetricsTracerOption) MetricsTracer {
setting := &metricsTracerSetting{reg: prometheus.DefaultRegisterer}
for _, opt := range opts {
opt(setting)
}
metricshelper.RegisterCollectors(setting.reg, collectors...)
// Initialise these counters to 0 otherwise the first reservation requests aren't handled
// correctly when using promql increse function
reservationRequestsOutcomeTotal.WithLabelValues("refresh", "success")
reservationRequestsOutcomeTotal.WithLabelValues("new", "success")
candidatesCircuitV2SupportTotal.WithLabelValues("yes")
candidatesCircuitV2SupportTotal.WithLabelValues("no")
return &metricsTracer{}
}
func (mt *metricsTracer) RelayFinderStatus(isActive bool) {
if isActive {
status.Set(1)
} else {
status.Set(0)
}
}
func (mt *metricsTracer) ReservationEnded(cnt int) {
reservationsClosedTotal.Add(float64(cnt))
}
func (mt *metricsTracer) ReservationOpened(cnt int) {
reservationsOpenedTotal.Add(float64(cnt))
}
func (mt *metricsTracer) ReservationRequestFinished(isRefresh bool, err error) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
if isRefresh {
*tags = append(*tags, "refresh")
} else {
*tags = append(*tags, "new")
}
*tags = append(*tags, getReservationRequestStatus(err))
reservationRequestsOutcomeTotal.WithLabelValues(*tags...).Inc()
if !isRefresh && err == nil {
reservationsOpenedTotal.Inc()
}
}
func (mt *metricsTracer) RelayAddressUpdated() {
relayAddressesUpdatedTotal.Inc()
}
func (mt *metricsTracer) RelayAddressCount(cnt int) {
relayAddressesCount.Set(float64(cnt))
}
func (mt *metricsTracer) CandidateChecked(supportsCircuitV2 bool) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
if supportsCircuitV2 {
*tags = append(*tags, "yes")
} else {
*tags = append(*tags, "no")
}
candidatesCircuitV2SupportTotal.WithLabelValues(*tags...).Inc()
}
func (mt *metricsTracer) CandidateAdded(cnt int) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = append(*tags, "added")
candidatesTotal.WithLabelValues(*tags...).Add(float64(cnt))
}
func (mt *metricsTracer) CandidateRemoved(cnt int) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = append(*tags, "removed")
candidatesTotal.WithLabelValues(*tags...).Add(float64(cnt))
}
func (mt *metricsTracer) CandidateLoopState(state candidateLoopState) {
candLoopState.Set(float64(state))
}
func (mt *metricsTracer) ScheduledWorkUpdated(scheduledWork *scheduledWorkTimes) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = append(*tags, "allowed peer source call")
scheduledWorkTime.WithLabelValues(*tags...).Set(float64(scheduledWork.nextAllowedCallToPeerSource.Unix()))
*tags = (*tags)[:0]
*tags = append(*tags, "reservation refresh")
scheduledWorkTime.WithLabelValues(*tags...).Set(float64(scheduledWork.nextRefresh.Unix()))
*tags = (*tags)[:0]
*tags = append(*tags, "clear backoff")
scheduledWorkTime.WithLabelValues(*tags...).Set(float64(scheduledWork.nextBackoff.Unix()))
*tags = (*tags)[:0]
*tags = append(*tags, "old candidate check")
scheduledWorkTime.WithLabelValues(*tags...).Set(float64(scheduledWork.nextOldCandidateCheck.Unix()))
}
func (mt *metricsTracer) DesiredReservations(cnt int) {
desiredReservations.Set(float64(cnt))
}
func getReservationRequestStatus(err error) string {
if err == nil {
return "success"
}
status := "err other"
var re client.ReservationError
if errors.As(err, &re) {
switch re.Status {
case pbv2.Status_CONNECTION_FAILED:
return "connection failed"
case pbv2.Status_MALFORMED_MESSAGE:
return "malformed message"
case pbv2.Status_RESERVATION_REFUSED:
return "reservation refused"
case pbv2.Status_PERMISSION_DENIED:
return "permission denied"
case pbv2.Status_RESOURCE_LIMIT_EXCEEDED:
return "resource limit exceeded"
}
}
return status
}
// wrappedMetricsTracer wraps MetricsTracer and ignores all calls when mt is nil
type wrappedMetricsTracer struct {
mt MetricsTracer
}
var _ MetricsTracer = &wrappedMetricsTracer{}
func (mt *wrappedMetricsTracer) RelayFinderStatus(isActive bool) {
if mt.mt != nil {
mt.mt.RelayFinderStatus(isActive)
}
}
func (mt *wrappedMetricsTracer) ReservationEnded(cnt int) {
if mt.mt != nil {
mt.mt.ReservationEnded(cnt)
}
}
func (mt *wrappedMetricsTracer) ReservationOpened(cnt int) {
if mt.mt != nil {
mt.mt.ReservationOpened(cnt)
}
}
func (mt *wrappedMetricsTracer) ReservationRequestFinished(isRefresh bool, err error) {
if mt.mt != nil {
mt.mt.ReservationRequestFinished(isRefresh, err)
}
}
func (mt *wrappedMetricsTracer) RelayAddressUpdated() {
if mt.mt != nil {
mt.mt.RelayAddressUpdated()
}
}
func (mt *wrappedMetricsTracer) RelayAddressCount(cnt int) {
if mt.mt != nil {
mt.mt.RelayAddressCount(cnt)
}
}
func (mt *wrappedMetricsTracer) CandidateChecked(supportsCircuitV2 bool) {
if mt.mt != nil {
mt.mt.CandidateChecked(supportsCircuitV2)
}
}
func (mt *wrappedMetricsTracer) CandidateAdded(cnt int) {
if mt.mt != nil {
mt.mt.CandidateAdded(cnt)
}
}
func (mt *wrappedMetricsTracer) CandidateRemoved(cnt int) {
if mt.mt != nil {
mt.mt.CandidateRemoved(cnt)
}
}
func (mt *wrappedMetricsTracer) ScheduledWorkUpdated(scheduledWork *scheduledWorkTimes) {
if mt.mt != nil {
mt.mt.ScheduledWorkUpdated(scheduledWork)
}
}
func (mt *wrappedMetricsTracer) DesiredReservations(cnt int) {
if mt.mt != nil {
mt.mt.DesiredReservations(cnt)
}
}
func (mt *wrappedMetricsTracer) CandidateLoopState(state candidateLoopState) {
if mt.mt != nil {
mt.mt.CandidateLoopState(state)
}
}

View File

@@ -0,0 +1,233 @@
package autorelay
import (
"context"
"errors"
"time"
"github.com/libp2p/go-libp2p/core/peer"
)
// AutoRelay will call this function when it needs new candidates because it is
// not connected to the desired number of relays or we get disconnected from one
// of the relays. Implementations must send *at most* numPeers, and close the
// channel when they don't intend to provide any more peers. AutoRelay will not
// call the callback again until the channel is closed. Implementations should
// send new peers, but may send peers they sent before. AutoRelay implements a
// per-peer backoff (see WithBackoff). See WithMinInterval for setting the
// minimum interval between calls to the callback. The context.Context passed
// may be canceled when AutoRelay feels satisfied, it will be canceled when the
// node is shutting down. If the context is canceled you MUST close the output
// channel at some point.
type PeerSource func(ctx context.Context, num int) <-chan peer.AddrInfo
type config struct {
clock ClockWithInstantTimer
peerSource PeerSource
// minimum interval used to call the peerSource callback
minInterval time.Duration
// see WithMinCandidates
minCandidates int
// see WithMaxCandidates
maxCandidates int
// Delay until we obtain reservations with relays, if we have less than minCandidates candidates.
// See WithBootDelay.
bootDelay time.Duration
// backoff is the time we wait after failing to obtain a reservation with a candidate
backoff time.Duration
// Number of relays we strive to obtain a reservation with.
desiredRelays int
// see WithMaxCandidateAge
maxCandidateAge time.Duration
setMinCandidates bool
// see WithMetricsTracer
metricsTracer MetricsTracer
}
var defaultConfig = config{
clock: RealClock{},
minCandidates: 4,
maxCandidates: 20,
bootDelay: 3 * time.Minute,
backoff: time.Hour,
desiredRelays: 2,
maxCandidateAge: 30 * time.Minute,
minInterval: 30 * time.Second,
}
var (
errAlreadyHavePeerSource = errors.New("can only use a single WithPeerSource or WithStaticRelays")
)
type Option func(*config) error
func WithStaticRelays(static []peer.AddrInfo) Option {
return func(c *config) error {
if c.peerSource != nil {
return errAlreadyHavePeerSource
}
WithPeerSource(func(ctx context.Context, numPeers int) <-chan peer.AddrInfo {
if len(static) < numPeers {
numPeers = len(static)
}
c := make(chan peer.AddrInfo, numPeers)
defer close(c)
for i := 0; i < numPeers; i++ {
c <- static[i]
}
return c
})(c)
WithMinCandidates(len(static))(c)
WithMaxCandidates(len(static))(c)
WithNumRelays(len(static))(c)
return nil
}
}
// WithPeerSource defines a callback for AutoRelay to query for more relay candidates.
func WithPeerSource(f PeerSource) Option {
return func(c *config) error {
if c.peerSource != nil {
return errAlreadyHavePeerSource
}
c.peerSource = f
return nil
}
}
// WithNumRelays sets the number of relays we strive to obtain reservations with.
func WithNumRelays(n int) Option {
return func(c *config) error {
c.desiredRelays = n
return nil
}
}
// WithMaxCandidates sets the number of relay candidates that we buffer.
func WithMaxCandidates(n int) Option {
return func(c *config) error {
c.maxCandidates = n
if c.minCandidates > n {
c.minCandidates = n
}
return nil
}
}
// WithMinCandidates sets the minimum number of relay candidates we collect before to get a reservation
// with any of them (unless we've been running for longer than the boot delay).
// This is to make sure that we don't just randomly connect to the first candidate that we discover.
func WithMinCandidates(n int) Option {
return func(c *config) error {
if n > c.maxCandidates {
n = c.maxCandidates
}
c.minCandidates = n
c.setMinCandidates = true
return nil
}
}
// WithBootDelay set the boot delay for finding relays.
// We won't attempt any reservation if we've have less than a minimum number of candidates.
// This prevents us to connect to the "first best" relay, and allows us to carefully select the relay.
// However, in case we haven't found enough relays after the boot delay, we use what we have.
func WithBootDelay(d time.Duration) Option {
return func(c *config) error {
c.bootDelay = d
return nil
}
}
// WithBackoff sets the time we wait after failing to obtain a reservation with a candidate.
func WithBackoff(d time.Duration) Option {
return func(c *config) error {
c.backoff = d
return nil
}
}
// WithMaxCandidateAge sets the maximum age of a candidate.
// When we are connected to the desired number of relays, we don't ask the peer source for new candidates.
// This can lead to AutoRelay's candidate list becoming outdated, and means we won't be able
// to quickly establish a new relay connection if our existing connection breaks, if all the candidates
// have become stale.
func WithMaxCandidateAge(d time.Duration) Option {
return func(c *config) error {
c.maxCandidateAge = d
return nil
}
}
// InstantTimer is a timer that triggers at some instant rather than some duration
type InstantTimer interface {
Reset(d time.Time) bool
Stop() bool
Ch() <-chan time.Time
}
// ClockWithInstantTimer is a clock that can create timers that trigger at some
// instant rather than some duration
type ClockWithInstantTimer interface {
Now() time.Time
Since(t time.Time) time.Duration
InstantTimer(when time.Time) InstantTimer
}
type RealTimer struct{ t *time.Timer }
var _ InstantTimer = (*RealTimer)(nil)
func (t RealTimer) Ch() <-chan time.Time {
return t.t.C
}
func (t RealTimer) Reset(d time.Time) bool {
return t.t.Reset(time.Until(d))
}
func (t RealTimer) Stop() bool {
return t.t.Stop()
}
type RealClock struct{}
var _ ClockWithInstantTimer = RealClock{}
func (RealClock) Now() time.Time {
return time.Now()
}
func (RealClock) Since(t time.Time) time.Duration {
return time.Since(t)
}
func (RealClock) InstantTimer(when time.Time) InstantTimer {
t := time.NewTimer(time.Until(when))
return &RealTimer{t}
}
func WithClock(cl ClockWithInstantTimer) Option {
return func(c *config) error {
c.clock = cl
return nil
}
}
// WithMinInterval sets the minimum interval after which peerSource callback will be called for more
// candidates even if AutoRelay needs new candidates.
func WithMinInterval(interval time.Duration) Option {
return func(c *config) error {
c.minInterval = interval
return nil
}
}
// WithMetricsTracer configures autorelay to use mt to track metrics
func WithMetricsTracer(mt MetricsTracer) Option {
return func(c *config) error {
c.metricsTracer = mt
return nil
}
}

View File

@@ -0,0 +1,17 @@
package autorelay
import (
ma "github.com/multiformats/go-multiaddr"
)
// Filter filters out all relay addresses.
func Filter(addrs []ma.Multiaddr) []ma.Multiaddr {
raddrs := make([]ma.Multiaddr, 0, len(addrs))
for _, addr := range addrs {
if isRelayAddr(addr) {
continue
}
raddrs = append(raddrs, addr)
}
return raddrs
}

View File

@@ -0,0 +1,810 @@
package autorelay
import (
"context"
"errors"
"fmt"
"math/rand"
"sync"
"time"
"golang.org/x/sync/errgroup"
"github.com/libp2p/go-libp2p/core/event"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
basic "github.com/libp2p/go-libp2p/p2p/host/basic"
"github.com/libp2p/go-libp2p/p2p/host/eventbus"
circuitv2 "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/client"
circuitv2_proto "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/proto"
ma "github.com/multiformats/go-multiaddr"
manet "github.com/multiformats/go-multiaddr/net"
)
const protoIDv2 = circuitv2_proto.ProtoIDv2Hop
// Terminology:
// Candidate: Once we connect to a node and it supports relay protocol,
// we call it a candidate, and consider using it as a relay.
// Relay: Out of the list of candidates, we select a relay to connect to.
// Currently, we just randomly select a candidate, but we can employ more sophisticated
// selection strategies here (e.g. by facotring in the RTT).
const (
rsvpRefreshInterval = time.Minute
rsvpExpirationSlack = 2 * time.Minute
autorelayTag = "autorelay"
)
type candidate struct {
added time.Time
supportsRelayV2 bool
ai peer.AddrInfo
}
// relayFinder is a Host that uses relays for connectivity when a NAT is detected.
type relayFinder struct {
bootTime time.Time
host *basic.BasicHost
conf *config
refCount sync.WaitGroup
ctxCancel context.CancelFunc
ctxCancelMx sync.Mutex
peerSource PeerSource
candidateFound chan struct{} // receives every time we find a new relay candidate
candidateMx sync.Mutex
candidates map[peer.ID]*candidate
backoff map[peer.ID]time.Time
maybeConnectToRelayTrigger chan struct{} // cap: 1
// Any time _something_ hapens that might cause us to need new candidates.
// This could be
// * the disconnection of a relay
// * the failed attempt to obtain a reservation with a current candidate
// * a candidate is deleted due to its age
maybeRequestNewCandidates chan struct{} // cap: 1.
relayUpdated chan struct{}
relayMx sync.Mutex
relays map[peer.ID]*circuitv2.Reservation
cachedAddrs []ma.Multiaddr
cachedAddrsExpiry time.Time
// A channel that triggers a run of `runScheduledWork`.
triggerRunScheduledWork chan struct{}
metricsTracer MetricsTracer
}
var errAlreadyRunning = errors.New("relayFinder already running")
func newRelayFinder(host *basic.BasicHost, peerSource PeerSource, conf *config) *relayFinder {
if peerSource == nil {
panic("Can not create a new relayFinder. Need a Peer Source fn or a list of static relays. Refer to the documentation around `libp2p.EnableAutoRelay`")
}
return &relayFinder{
bootTime: conf.clock.Now(),
host: host,
conf: conf,
peerSource: peerSource,
candidates: make(map[peer.ID]*candidate),
backoff: make(map[peer.ID]time.Time),
candidateFound: make(chan struct{}, 1),
maybeConnectToRelayTrigger: make(chan struct{}, 1),
maybeRequestNewCandidates: make(chan struct{}, 1),
triggerRunScheduledWork: make(chan struct{}, 1),
relays: make(map[peer.ID]*circuitv2.Reservation),
relayUpdated: make(chan struct{}, 1),
metricsTracer: &wrappedMetricsTracer{conf.metricsTracer},
}
}
type scheduledWorkTimes struct {
leastFrequentInterval time.Duration
nextRefresh time.Time
nextBackoff time.Time
nextOldCandidateCheck time.Time
nextAllowedCallToPeerSource time.Time
}
func (rf *relayFinder) background(ctx context.Context) {
peerSourceRateLimiter := make(chan struct{}, 1)
rf.refCount.Add(1)
go func() {
defer rf.refCount.Done()
rf.findNodes(ctx, peerSourceRateLimiter)
}()
rf.refCount.Add(1)
go func() {
defer rf.refCount.Done()
rf.handleNewCandidates(ctx)
}()
subConnectedness, err := rf.host.EventBus().Subscribe(new(event.EvtPeerConnectednessChanged), eventbus.Name("autorelay (relay finder)"))
if err != nil {
log.Error("failed to subscribe to the EvtPeerConnectednessChanged")
return
}
defer subConnectedness.Close()
now := rf.conf.clock.Now()
bootDelayTimer := rf.conf.clock.InstantTimer(now.Add(rf.conf.bootDelay))
defer bootDelayTimer.Stop()
// This is the least frequent event. It's our fallback timer if we don't have any other work to do.
leastFrequentInterval := rf.conf.minInterval
// Check if leastFrequentInterval is 0 to avoid busy looping
if rf.conf.backoff > leastFrequentInterval || leastFrequentInterval == 0 {
leastFrequentInterval = rf.conf.backoff
}
if rf.conf.maxCandidateAge > leastFrequentInterval || leastFrequentInterval == 0 {
leastFrequentInterval = rf.conf.maxCandidateAge
}
if rsvpRefreshInterval > leastFrequentInterval || leastFrequentInterval == 0 {
leastFrequentInterval = rsvpRefreshInterval
}
scheduledWork := &scheduledWorkTimes{
leastFrequentInterval: leastFrequentInterval,
nextRefresh: now.Add(rsvpRefreshInterval),
nextBackoff: now.Add(rf.conf.backoff),
nextOldCandidateCheck: now.Add(rf.conf.maxCandidateAge),
nextAllowedCallToPeerSource: now.Add(-time.Second), // allow immediately
}
workTimer := rf.conf.clock.InstantTimer(rf.runScheduledWork(ctx, now, scheduledWork, peerSourceRateLimiter))
defer workTimer.Stop()
for {
select {
case ev, ok := <-subConnectedness.Out():
if !ok {
return
}
evt := ev.(event.EvtPeerConnectednessChanged)
if evt.Connectedness != network.NotConnected {
continue
}
push := false
rf.relayMx.Lock()
if rf.usingRelay(evt.Peer) { // we were disconnected from a relay
log.Debugw("disconnected from relay", "id", evt.Peer)
delete(rf.relays, evt.Peer)
rf.notifyMaybeConnectToRelay()
rf.notifyMaybeNeedNewCandidates()
push = true
}
rf.relayMx.Unlock()
if push {
rf.clearCachedAddrsAndSignalAddressChange()
rf.metricsTracer.ReservationEnded(1)
}
case <-rf.candidateFound:
rf.notifyMaybeConnectToRelay()
case <-bootDelayTimer.Ch():
rf.notifyMaybeConnectToRelay()
case <-rf.relayUpdated:
rf.clearCachedAddrsAndSignalAddressChange()
case now := <-workTimer.Ch():
// Note: `now` is not guaranteed to be the current time. It's the time
// that the timer was fired. This is okay because we'll schedule
// future work at a specific time.
nextTime := rf.runScheduledWork(ctx, now, scheduledWork, peerSourceRateLimiter)
workTimer.Reset(nextTime)
case <-rf.triggerRunScheduledWork:
// Ignore the next time because we aren't scheduling any future work here
_ = rf.runScheduledWork(ctx, rf.conf.clock.Now(), scheduledWork, peerSourceRateLimiter)
case <-ctx.Done():
return
}
}
}
func (rf *relayFinder) clearCachedAddrsAndSignalAddressChange() {
rf.relayMx.Lock()
rf.cachedAddrs = nil
rf.relayMx.Unlock()
rf.host.SignalAddressChange()
rf.metricsTracer.RelayAddressUpdated()
}
func (rf *relayFinder) runScheduledWork(ctx context.Context, now time.Time, scheduledWork *scheduledWorkTimes, peerSourceRateLimiter chan<- struct{}) time.Time {
nextTime := now.Add(scheduledWork.leastFrequentInterval)
if now.After(scheduledWork.nextRefresh) {
scheduledWork.nextRefresh = now.Add(rsvpRefreshInterval)
if rf.refreshReservations(ctx, now) {
rf.clearCachedAddrsAndSignalAddressChange()
}
}
if now.After(scheduledWork.nextBackoff) {
scheduledWork.nextBackoff = rf.clearBackoff(now)
}
if now.After(scheduledWork.nextOldCandidateCheck) {
scheduledWork.nextOldCandidateCheck = rf.clearOldCandidates(now)
}
if now.After(scheduledWork.nextAllowedCallToPeerSource) {
select {
case peerSourceRateLimiter <- struct{}{}:
scheduledWork.nextAllowedCallToPeerSource = now.Add(rf.conf.minInterval)
if scheduledWork.nextAllowedCallToPeerSource.Before(nextTime) {
nextTime = scheduledWork.nextAllowedCallToPeerSource
}
default:
}
} else {
// We still need to schedule this work if it's sooner than nextTime
if scheduledWork.nextAllowedCallToPeerSource.Before(nextTime) {
nextTime = scheduledWork.nextAllowedCallToPeerSource
}
}
// Find the next time we need to run scheduled work.
if scheduledWork.nextRefresh.Before(nextTime) {
nextTime = scheduledWork.nextRefresh
}
if scheduledWork.nextBackoff.Before(nextTime) {
nextTime = scheduledWork.nextBackoff
}
if scheduledWork.nextOldCandidateCheck.Before(nextTime) {
nextTime = scheduledWork.nextOldCandidateCheck
}
if nextTime == now {
// Only happens in CI with a mock clock
nextTime = nextTime.Add(1) // avoids an infinite loop
}
rf.metricsTracer.ScheduledWorkUpdated(scheduledWork)
return nextTime
}
// clearOldCandidates clears old candidates from the map. Returns the next time
// to run this function.
func (rf *relayFinder) clearOldCandidates(now time.Time) time.Time {
// If we don't have any candidates, we should run this again in rf.conf.maxCandidateAge.
nextTime := now.Add(rf.conf.maxCandidateAge)
var deleted bool
rf.candidateMx.Lock()
defer rf.candidateMx.Unlock()
for id, cand := range rf.candidates {
expiry := cand.added.Add(rf.conf.maxCandidateAge)
if expiry.After(now) {
if expiry.Before(nextTime) {
nextTime = expiry
}
} else {
log.Debugw("deleting candidate due to age", "id", id)
deleted = true
rf.removeCandidate(id)
}
}
if deleted {
rf.notifyMaybeNeedNewCandidates()
}
return nextTime
}
// clearBackoff clears old backoff entries from the map. Returns the next time
// to run this function.
func (rf *relayFinder) clearBackoff(now time.Time) time.Time {
nextTime := now.Add(rf.conf.backoff)
rf.candidateMx.Lock()
defer rf.candidateMx.Unlock()
for id, t := range rf.backoff {
expiry := t.Add(rf.conf.backoff)
if expiry.After(now) {
if expiry.Before(nextTime) {
nextTime = expiry
}
} else {
log.Debugw("removing backoff for node", "id", id)
delete(rf.backoff, id)
}
}
return nextTime
}
// findNodes accepts nodes from the channel and tests if they support relaying.
// It is run on both public and private nodes.
// It garbage collects old entries, so that nodes doesn't overflow.
// This makes sure that as soon as we need to find relay candidates, we have them available.
// peerSourceRateLimiter is used to limit how often we call the peer source.
func (rf *relayFinder) findNodes(ctx context.Context, peerSourceRateLimiter <-chan struct{}) {
var peerChan <-chan peer.AddrInfo
var wg sync.WaitGroup
for {
rf.candidateMx.Lock()
numCandidates := len(rf.candidates)
rf.candidateMx.Unlock()
if peerChan == nil && numCandidates < rf.conf.minCandidates {
rf.metricsTracer.CandidateLoopState(peerSourceRateLimited)
select {
case <-peerSourceRateLimiter:
peerChan = rf.peerSource(ctx, rf.conf.maxCandidates)
select {
case rf.triggerRunScheduledWork <- struct{}{}:
default:
}
case <-ctx.Done():
return
}
}
if peerChan == nil {
rf.metricsTracer.CandidateLoopState(waitingForTrigger)
} else {
rf.metricsTracer.CandidateLoopState(waitingOnPeerChan)
}
select {
case <-rf.maybeRequestNewCandidates:
continue
case pi, ok := <-peerChan:
if !ok {
wg.Wait()
peerChan = nil
continue
}
log.Debugw("found node", "id", pi.ID)
rf.candidateMx.Lock()
numCandidates := len(rf.candidates)
backoffStart, isOnBackoff := rf.backoff[pi.ID]
rf.candidateMx.Unlock()
if isOnBackoff {
log.Debugw("skipping node that we recently failed to obtain a reservation with", "id", pi.ID, "last attempt", rf.conf.clock.Since(backoffStart))
continue
}
if numCandidates >= rf.conf.maxCandidates {
log.Debugw("skipping node. Already have enough candidates", "id", pi.ID, "num", numCandidates, "max", rf.conf.maxCandidates)
continue
}
rf.refCount.Add(1)
wg.Add(1)
go func() {
defer rf.refCount.Done()
defer wg.Done()
if added := rf.handleNewNode(ctx, pi); added {
rf.notifyNewCandidate()
}
}()
case <-ctx.Done():
rf.metricsTracer.CandidateLoopState(stopped)
return
}
}
}
func (rf *relayFinder) notifyMaybeConnectToRelay() {
select {
case rf.maybeConnectToRelayTrigger <- struct{}{}:
default:
}
}
func (rf *relayFinder) notifyMaybeNeedNewCandidates() {
select {
case rf.maybeRequestNewCandidates <- struct{}{}:
default:
}
}
func (rf *relayFinder) notifyNewCandidate() {
select {
case rf.candidateFound <- struct{}{}:
default:
}
}
// handleNewNode tests if a peer supports circuit v2.
// This method is only run on private nodes.
// If a peer does, it is added to the candidates map.
// Note that just supporting the protocol doesn't guarantee that we can also obtain a reservation.
func (rf *relayFinder) handleNewNode(ctx context.Context, pi peer.AddrInfo) (added bool) {
rf.relayMx.Lock()
relayInUse := rf.usingRelay(pi.ID)
rf.relayMx.Unlock()
if relayInUse {
return false
}
ctx, cancel := context.WithTimeout(ctx, 20*time.Second)
defer cancel()
supportsV2, err := rf.tryNode(ctx, pi)
if err != nil {
log.Debugf("node %s not accepted as a candidate: %s", pi.ID, err)
if err == errProtocolNotSupported {
rf.metricsTracer.CandidateChecked(false)
}
return false
}
rf.metricsTracer.CandidateChecked(true)
rf.candidateMx.Lock()
if len(rf.candidates) > rf.conf.maxCandidates {
rf.candidateMx.Unlock()
return false
}
log.Debugw("node supports relay protocol", "peer", pi.ID, "supports circuit v2", supportsV2)
rf.addCandidate(&candidate{
added: rf.conf.clock.Now(),
ai: pi,
supportsRelayV2: supportsV2,
})
rf.candidateMx.Unlock()
return true
}
var errProtocolNotSupported = errors.New("doesn't speak circuit v2")
// tryNode checks if a peer actually supports either circuit v2.
// It does not modify any internal state.
func (rf *relayFinder) tryNode(ctx context.Context, pi peer.AddrInfo) (supportsRelayV2 bool, err error) {
if err := rf.host.Connect(ctx, pi); err != nil {
return false, fmt.Errorf("error connecting to relay %s: %w", pi.ID, err)
}
conns := rf.host.Network().ConnsToPeer(pi.ID)
for _, conn := range conns {
if isRelayAddr(conn.RemoteMultiaddr()) {
return false, errors.New("not a public node")
}
}
// wait for identify to complete in at least one conn so that we can check the supported protocols
ready := make(chan struct{}, 1)
for _, conn := range conns {
go func(conn network.Conn) {
select {
case <-rf.host.IDService().IdentifyWait(conn):
select {
case ready <- struct{}{}:
default:
}
case <-ctx.Done():
}
}(conn)
}
select {
case <-ready:
case <-ctx.Done():
return false, ctx.Err()
}
protos, err := rf.host.Peerstore().SupportsProtocols(pi.ID, protoIDv2)
if err != nil {
return false, fmt.Errorf("error checking relay protocol support for peer %s: %w", pi.ID, err)
}
if len(protos) == 0 {
return false, errProtocolNotSupported
}
return true, nil
}
// When a new node that could be a relay is found, we receive a notification on the maybeConnectToRelayTrigger chan.
// This function makes sure that we only run one instance of maybeConnectToRelay at once, and buffers
// exactly one more trigger event to run maybeConnectToRelay.
func (rf *relayFinder) handleNewCandidates(ctx context.Context) {
for {
select {
case <-ctx.Done():
return
case <-rf.maybeConnectToRelayTrigger:
rf.maybeConnectToRelay(ctx)
}
}
}
func (rf *relayFinder) maybeConnectToRelay(ctx context.Context) {
rf.relayMx.Lock()
numRelays := len(rf.relays)
rf.relayMx.Unlock()
// We're already connected to our desired number of relays. Nothing to do here.
if numRelays == rf.conf.desiredRelays {
return
}
rf.candidateMx.Lock()
if len(rf.relays) == 0 && len(rf.candidates) < rf.conf.minCandidates && rf.conf.clock.Since(rf.bootTime) < rf.conf.bootDelay {
// During the startup phase, we don't want to connect to the first candidate that we find.
// Instead, we wait until we've found at least minCandidates, and then select the best of those.
// However, if that takes too long (longer than bootDelay), we still go ahead.
rf.candidateMx.Unlock()
return
}
if len(rf.candidates) == 0 {
rf.candidateMx.Unlock()
return
}
candidates := rf.selectCandidates()
rf.candidateMx.Unlock()
// We now iterate over the candidates, attempting (sequentially) to get reservations with them, until
// we reach the desired number of relays.
for _, cand := range candidates {
id := cand.ai.ID
rf.relayMx.Lock()
usingRelay := rf.usingRelay(id)
rf.relayMx.Unlock()
if usingRelay {
rf.candidateMx.Lock()
rf.removeCandidate(id)
rf.candidateMx.Unlock()
rf.notifyMaybeNeedNewCandidates()
continue
}
rsvp, err := rf.connectToRelay(ctx, cand)
if err != nil {
log.Debugw("failed to connect to relay", "peer", id, "error", err)
rf.notifyMaybeNeedNewCandidates()
rf.metricsTracer.ReservationRequestFinished(false, err)
continue
}
log.Debugw("adding new relay", "id", id)
rf.relayMx.Lock()
rf.relays[id] = rsvp
numRelays := len(rf.relays)
rf.relayMx.Unlock()
rf.notifyMaybeNeedNewCandidates()
rf.host.ConnManager().Protect(id, autorelayTag) // protect the connection
select {
case rf.relayUpdated <- struct{}{}:
default:
}
rf.metricsTracer.ReservationRequestFinished(false, nil)
if numRelays >= rf.conf.desiredRelays {
break
}
}
}
func (rf *relayFinder) connectToRelay(ctx context.Context, cand *candidate) (*circuitv2.Reservation, error) {
id := cand.ai.ID
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
var rsvp *circuitv2.Reservation
// make sure we're still connected.
if rf.host.Network().Connectedness(id) != network.Connected {
if err := rf.host.Connect(ctx, cand.ai); err != nil {
rf.candidateMx.Lock()
rf.removeCandidate(cand.ai.ID)
rf.candidateMx.Unlock()
return nil, fmt.Errorf("failed to connect: %w", err)
}
}
rf.candidateMx.Lock()
rf.backoff[id] = rf.conf.clock.Now()
rf.candidateMx.Unlock()
var err error
if cand.supportsRelayV2 {
rsvp, err = circuitv2.Reserve(ctx, rf.host, cand.ai)
if err != nil {
err = fmt.Errorf("failed to reserve slot: %w", err)
}
}
rf.candidateMx.Lock()
rf.removeCandidate(id)
rf.candidateMx.Unlock()
return rsvp, err
}
func (rf *relayFinder) refreshReservations(ctx context.Context, now time.Time) bool {
rf.relayMx.Lock()
// find reservations about to expire and refresh them in parallel
g := new(errgroup.Group)
for p, rsvp := range rf.relays {
if now.Add(rsvpExpirationSlack).Before(rsvp.Expiration) {
continue
}
p := p
g.Go(func() error {
err := rf.refreshRelayReservation(ctx, p)
rf.metricsTracer.ReservationRequestFinished(true, err)
return err
})
}
rf.relayMx.Unlock()
err := g.Wait()
return err != nil
}
func (rf *relayFinder) refreshRelayReservation(ctx context.Context, p peer.ID) error {
rsvp, err := circuitv2.Reserve(ctx, rf.host, peer.AddrInfo{ID: p})
rf.relayMx.Lock()
if err != nil {
log.Debugw("failed to refresh relay slot reservation", "relay", p, "error", err)
_, exists := rf.relays[p]
delete(rf.relays, p)
// unprotect the connection
rf.host.ConnManager().Unprotect(p, autorelayTag)
rf.relayMx.Unlock()
if exists {
rf.metricsTracer.ReservationEnded(1)
}
return err
}
log.Debugw("refreshed relay slot reservation", "relay", p)
rf.relays[p] = rsvp
rf.relayMx.Unlock()
return nil
}
// usingRelay returns if we're currently using the given relay.
func (rf *relayFinder) usingRelay(p peer.ID) bool {
_, ok := rf.relays[p]
return ok
}
// addCandidates adds a candidate to the candidates set. Assumes caller holds candidateMx mutex
func (rf *relayFinder) addCandidate(cand *candidate) {
_, exists := rf.candidates[cand.ai.ID]
rf.candidates[cand.ai.ID] = cand
if !exists {
rf.metricsTracer.CandidateAdded(1)
}
}
func (rf *relayFinder) removeCandidate(id peer.ID) {
_, exists := rf.candidates[id]
if exists {
delete(rf.candidates, id)
rf.metricsTracer.CandidateRemoved(1)
}
}
// selectCandidates returns an ordered slice of relay candidates.
// Callers should attempt to obtain reservations with the candidates in this order.
func (rf *relayFinder) selectCandidates() []*candidate {
now := rf.conf.clock.Now()
candidates := make([]*candidate, 0, len(rf.candidates))
for _, cand := range rf.candidates {
if cand.added.Add(rf.conf.maxCandidateAge).After(now) {
candidates = append(candidates, cand)
}
}
// TODO: better relay selection strategy; this just selects random relays,
// but we should probably use ping latency as the selection metric
rand.Shuffle(len(candidates), func(i, j int) {
candidates[i], candidates[j] = candidates[j], candidates[i]
})
return candidates
}
// This function is computes the NATed relay addrs when our status is private:
// - The public addrs are removed from the address set.
// - The non-public addrs are included verbatim so that peers behind the same NAT/firewall
// can still dial us directly.
// - On top of those, we add the relay-specific addrs for the relays to which we are
// connected. For each non-private relay addr, we encapsulate the p2p-circuit addr
// through which we can be dialed.
func (rf *relayFinder) relayAddrs(addrs []ma.Multiaddr) []ma.Multiaddr {
rf.relayMx.Lock()
defer rf.relayMx.Unlock()
if rf.cachedAddrs != nil && rf.conf.clock.Now().Before(rf.cachedAddrsExpiry) {
return rf.cachedAddrs
}
raddrs := make([]ma.Multiaddr, 0, 4*len(rf.relays)+4)
// only keep private addrs from the original addr set
for _, addr := range addrs {
if manet.IsPrivateAddr(addr) {
raddrs = append(raddrs, addr)
}
}
// add relay specific addrs to the list
relayAddrCnt := 0
for p := range rf.relays {
addrs := cleanupAddressSet(rf.host.Peerstore().Addrs(p))
relayAddrCnt += len(addrs)
circuit := ma.StringCast(fmt.Sprintf("/p2p/%s/p2p-circuit", p))
for _, addr := range addrs {
pub := addr.Encapsulate(circuit)
raddrs = append(raddrs, pub)
}
}
rf.cachedAddrs = raddrs
rf.cachedAddrsExpiry = rf.conf.clock.Now().Add(30 * time.Second)
rf.metricsTracer.RelayAddressCount(relayAddrCnt)
return raddrs
}
func (rf *relayFinder) Start() error {
rf.ctxCancelMx.Lock()
defer rf.ctxCancelMx.Unlock()
if rf.ctxCancel != nil {
return errAlreadyRunning
}
log.Debug("starting relay finder")
rf.initMetrics()
ctx, cancel := context.WithCancel(context.Background())
rf.ctxCancel = cancel
rf.refCount.Add(1)
go func() {
defer rf.refCount.Done()
rf.background(ctx)
}()
return nil
}
func (rf *relayFinder) Stop() error {
rf.ctxCancelMx.Lock()
defer rf.ctxCancelMx.Unlock()
log.Debug("stopping relay finder")
if rf.ctxCancel != nil {
rf.ctxCancel()
}
rf.refCount.Wait()
rf.ctxCancel = nil
rf.resetMetrics()
return nil
}
func (rf *relayFinder) initMetrics() {
rf.metricsTracer.DesiredReservations(rf.conf.desiredRelays)
rf.relayMx.Lock()
rf.metricsTracer.ReservationOpened(len(rf.relays))
rf.relayMx.Unlock()
rf.candidateMx.Lock()
rf.metricsTracer.CandidateAdded(len(rf.candidates))
rf.candidateMx.Unlock()
}
func (rf *relayFinder) resetMetrics() {
rf.relayMx.Lock()
rf.metricsTracer.ReservationEnded(len(rf.relays))
rf.relayMx.Unlock()
rf.candidateMx.Lock()
rf.metricsTracer.CandidateRemoved(len(rf.candidates))
rf.candidateMx.Unlock()
rf.metricsTracer.RelayAddressCount(0)
rf.metricsTracer.ScheduledWorkUpdated(&scheduledWorkTimes{})
}