Integrate BACKBEAT SDK and resolve KACHING license validation

Major integrations and fixes: - Added BACKBEAT SDK integration for P2P operation timing - Implemented beat-aware status tracking for distributed operations - Added Docker secrets support for secure license management - Resolved KACHING license validation via HTTPS/TLS - Updated docker-compose configuration for clean stack deployment - Disabled rollback policies to prevent deployment failures - Added license credential storage (CHORUS-DEV-MULTI-001) Technical improvements: - BACKBEAT P2P operation tracking with phase management - Enhanced configuration system with file-based secrets - Improved error handling for license validation - Clean separation of KACHING and CHORUS deployment stacks 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-06 07:56:26 +10:00
parent 543ab216f9
commit 9bdcbe0447
4730 changed files with 1480093 additions and 1916 deletions
--- a/vendor/github.com/libp2p/go-libp2p/p2p/host/autorelay/addrsplosion.go
+++ b/vendor/github.com/libp2p/go-libp2p/p2p/host/autorelay/addrsplosion.go
@@ -0,0 +1,165 @@
+package autorelay
+
+import (
+	"encoding/binary"
+
+	ma "github.com/multiformats/go-multiaddr"
+	manet "github.com/multiformats/go-multiaddr/net"
+)
+
+// This function cleans up a relay's address set to remove private addresses and curtail
+// addrsplosion.
+func cleanupAddressSet(addrs []ma.Multiaddr) []ma.Multiaddr {
+	var public, private []ma.Multiaddr
+
+	for _, a := range addrs {
+		if isRelayAddr(a) {
+			continue
+		}
+
+		if manet.IsPublicAddr(a) || isDNSAddr(a) {
+			public = append(public, a)
+			continue
+		}
+
+		// discard unroutable addrs
+		if manet.IsPrivateAddr(a) {
+			private = append(private, a)
+		}
+	}
+
+	if !hasAddrsplosion(public) {
+		return public
+	}
+
+	return sanitizeAddrsplodedSet(public, private)
+}
+
+func isRelayAddr(a ma.Multiaddr) bool {
+	isRelay := false
+
+	ma.ForEach(a, func(c ma.Component) bool {
+		switch c.Protocol().Code {
+		case ma.P_CIRCUIT:
+			isRelay = true
+			return false
+		default:
+			return true
+		}
+	})
+
+	return isRelay
+}
+
+func isDNSAddr(a ma.Multiaddr) bool {
+	if first, _ := ma.SplitFirst(a); first != nil {
+		switch first.Protocol().Code {
+		case ma.P_DNS4, ma.P_DNS6, ma.P_DNSADDR:
+			return true
+		}
+	}
+	return false
+}
+
+// we have addrsplosion if for some protocol we advertise multiple ports on
+// the same base address.
+func hasAddrsplosion(addrs []ma.Multiaddr) bool {
+	aset := make(map[string]int)
+
+	for _, a := range addrs {
+		key, port := addrKeyAndPort(a)
+		xport, ok := aset[key]
+		if ok && port != xport {
+			return true
+		}
+		aset[key] = port
+	}
+
+	return false
+}
+
+func addrKeyAndPort(a ma.Multiaddr) (string, int) {
+	var (
+		key  string
+		port int
+	)
+
+	ma.ForEach(a, func(c ma.Component) bool {
+		switch c.Protocol().Code {
+		case ma.P_TCP, ma.P_UDP:
+			port = int(binary.BigEndian.Uint16(c.RawValue()))
+			key += "/" + c.Protocol().Name
+		default:
+			val := c.Value()
+			if val == "" {
+				val = c.Protocol().Name
+			}
+			key += "/" + val
+		}
+		return true
+	})
+
+	return key, port
+}
+
+// clean up addrsplosion
+// the following heuristic is used:
+//   - for each base address/protocol combination, if there are multiple ports advertised then
+//     only accept the default port if present.
+//   - If the default port is not present, we check for non-standard ports by tracking
+//     private port bindings if present.
+//   - If there is no default or private port binding, then we can't infer the correct
+//     port and give up and return all addrs (for that base address)
+func sanitizeAddrsplodedSet(public, private []ma.Multiaddr) []ma.Multiaddr {
+	type portAndAddr struct {
+		addr ma.Multiaddr
+		port int
+	}
+
+	privports := make(map[int]struct{})
+	pubaddrs := make(map[string][]portAndAddr)
+
+	for _, a := range private {
+		_, port := addrKeyAndPort(a)
+		privports[port] = struct{}{}
+	}
+
+	for _, a := range public {
+		key, port := addrKeyAndPort(a)
+		pubaddrs[key] = append(pubaddrs[key], portAndAddr{addr: a, port: port})
+	}
+
+	var result []ma.Multiaddr
+	for _, pas := range pubaddrs {
+		if len(pas) == 1 {
+			// it's not addrsploded
+			result = append(result, pas[0].addr)
+			continue
+		}
+
+		haveAddr := false
+		for _, pa := range pas {
+			if _, ok := privports[pa.port]; ok {
+				// it matches a privately bound port, use it
+				result = append(result, pa.addr)
+				haveAddr = true
+				continue
+			}
+
+			if pa.port == 4001 || pa.port == 4002 {
+				// it's a default port, use it
+				result = append(result, pa.addr)
+				haveAddr = true
+			}
+		}
+
+		if !haveAddr {
+			// we weren't able to select a port; bite the bullet and use them all
+			for _, pa := range pas {
+				result = append(result, pa.addr)
+			}
+		}
+	}
+
+	return result
+}
--- a/vendor/github.com/libp2p/go-libp2p/p2p/host/autorelay/autorelay.go
+++ b/vendor/github.com/libp2p/go-libp2p/p2p/host/autorelay/autorelay.go
@@ -0,0 +1,125 @@
+package autorelay
+
+import (
+	"context"
+	"errors"
+	"sync"
+
+	"github.com/libp2p/go-libp2p/core/event"
+	"github.com/libp2p/go-libp2p/core/host"
+	"github.com/libp2p/go-libp2p/core/network"
+	basic "github.com/libp2p/go-libp2p/p2p/host/basic"
+	"github.com/libp2p/go-libp2p/p2p/host/eventbus"
+
+	logging "github.com/ipfs/go-log/v2"
+	ma "github.com/multiformats/go-multiaddr"
+)
+
+var log = logging.Logger("autorelay")
+
+type AutoRelay struct {
+	refCount  sync.WaitGroup
+	ctx       context.Context
+	ctxCancel context.CancelFunc
+
+	conf *config
+
+	mx     sync.Mutex
+	status network.Reachability
+
+	relayFinder *relayFinder
+
+	host   host.Host
+	addrsF basic.AddrsFactory
+
+	metricsTracer MetricsTracer
+}
+
+func NewAutoRelay(bhost *basic.BasicHost, opts ...Option) (*AutoRelay, error) {
+	r := &AutoRelay{
+		host:   bhost,
+		addrsF: bhost.AddrsFactory,
+		status: network.ReachabilityUnknown,
+	}
+	conf := defaultConfig
+	for _, opt := range opts {
+		if err := opt(&conf); err != nil {
+			return nil, err
+		}
+	}
+	r.ctx, r.ctxCancel = context.WithCancel(context.Background())
+	r.conf = &conf
+	r.relayFinder = newRelayFinder(bhost, conf.peerSource, &conf)
+	r.metricsTracer = &wrappedMetricsTracer{conf.metricsTracer}
+	bhost.AddrsFactory = r.hostAddrs
+
+	return r, nil
+}
+
+func (r *AutoRelay) Start() {
+	r.refCount.Add(1)
+	go func() {
+		defer r.refCount.Done()
+		r.background()
+	}()
+}
+
+func (r *AutoRelay) background() {
+	subReachability, err := r.host.EventBus().Subscribe(new(event.EvtLocalReachabilityChanged), eventbus.Name("autorelay (background)"))
+	if err != nil {
+		log.Debug("failed to subscribe to the EvtLocalReachabilityChanged")
+		return
+	}
+	defer subReachability.Close()
+
+	for {
+		select {
+		case <-r.ctx.Done():
+			return
+		case ev, ok := <-subReachability.Out():
+			if !ok {
+				return
+			}
+			// TODO: push changed addresses
+			evt := ev.(event.EvtLocalReachabilityChanged)
+			switch evt.Reachability {
+			case network.ReachabilityPrivate, network.ReachabilityUnknown:
+				err := r.relayFinder.Start()
+				if errors.Is(err, errAlreadyRunning) {
+					log.Debug("tried to start already running relay finder")
+				} else if err != nil {
+					log.Errorw("failed to start relay finder", "error", err)
+				} else {
+					r.metricsTracer.RelayFinderStatus(true)
+				}
+			case network.ReachabilityPublic:
+				r.relayFinder.Stop()
+				r.metricsTracer.RelayFinderStatus(false)
+			}
+			r.mx.Lock()
+			r.status = evt.Reachability
+			r.mx.Unlock()
+		}
+	}
+}
+
+func (r *AutoRelay) hostAddrs(addrs []ma.Multiaddr) []ma.Multiaddr {
+	return r.relayAddrs(r.addrsF(addrs))
+}
+
+func (r *AutoRelay) relayAddrs(addrs []ma.Multiaddr) []ma.Multiaddr {
+	r.mx.Lock()
+	defer r.mx.Unlock()
+
+	if r.status != network.ReachabilityPrivate {
+		return addrs
+	}
+	return r.relayFinder.relayAddrs(addrs)
+}
+
+func (r *AutoRelay) Close() error {
+	r.ctxCancel()
+	err := r.relayFinder.Stop()
+	r.refCount.Wait()
+	return err
+}
--- a/vendor/github.com/libp2p/go-libp2p/p2p/host/autorelay/host.go
+++ b/vendor/github.com/libp2p/go-libp2p/p2p/host/autorelay/host.go
@@ -0,0 +1,23 @@
+package autorelay
+
+import (
+	"github.com/libp2p/go-libp2p/core/host"
+)
+
+type AutoRelayHost struct {
+	host.Host
+	ar *AutoRelay
+}
+
+func (h *AutoRelayHost) Close() error {
+	_ = h.ar.Close()
+	return h.Host.Close()
+}
+
+func (h *AutoRelayHost) Start() {
+	h.ar.Start()
+}
+
+func NewAutoRelayHost(h host.Host, ar *AutoRelay) *AutoRelayHost {
+	return &AutoRelayHost{Host: h, ar: ar}
+}
--- a/vendor/github.com/libp2p/go-libp2p/p2p/host/autorelay/metrics.go
+++ b/vendor/github.com/libp2p/go-libp2p/p2p/host/autorelay/metrics.go
@@ -0,0 +1,373 @@
+package autorelay
+
+import (
+	"errors"
+
+	"github.com/libp2p/go-libp2p/p2p/metricshelper"
+	"github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/client"
+	pbv2 "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/pb"
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+const metricNamespace = "libp2p_autorelay"
+
+var (
+	status = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: metricNamespace,
+		Name:      "status",
+		Help:      "relay finder active",
+	})
+	reservationsOpenedTotal = prometheus.NewCounter(
+		prometheus.CounterOpts{
+			Namespace: metricNamespace,
+			Name:      "reservations_opened_total",
+			Help:      "Reservations Opened",
+		},
+	)
+	reservationsClosedTotal = prometheus.NewCounter(
+		prometheus.CounterOpts{
+			Namespace: metricNamespace,
+			Name:      "reservations_closed_total",
+			Help:      "Reservations Closed",
+		},
+	)
+	reservationRequestsOutcomeTotal = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: metricNamespace,
+			Name:      "reservation_requests_outcome_total",
+			Help:      "Reservation Request Outcome",
+		},
+		[]string{"request_type", "outcome"},
+	)
+
+	relayAddressesUpdatedTotal = prometheus.NewCounter(
+		prometheus.CounterOpts{
+			Namespace: metricNamespace,
+			Name:      "relay_addresses_updated_total",
+			Help:      "Relay Addresses Updated Count",
+		},
+	)
+	relayAddressesCount = prometheus.NewGauge(
+		prometheus.GaugeOpts{
+			Namespace: metricNamespace,
+			Name:      "relay_addresses_count",
+			Help:      "Relay Addresses Count",
+		},
+	)
+
+	candidatesCircuitV2SupportTotal = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: metricNamespace,
+			Name:      "candidates_circuit_v2_support_total",
+			Help:      "Candidiates supporting circuit v2",
+		},
+		[]string{"support"},
+	)
+	candidatesTotal = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: metricNamespace,
+			Name:      "candidates_total",
+			Help:      "Candidates Total",
+		},
+		[]string{"type"},
+	)
+	candLoopState = prometheus.NewGauge(
+		prometheus.GaugeOpts{
+			Namespace: metricNamespace,
+			Name:      "candidate_loop_state",
+			Help:      "Candidate Loop State",
+		},
+	)
+
+	scheduledWorkTime = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: metricNamespace,
+			Name:      "scheduled_work_time",
+			Help:      "Scheduled Work Times",
+		},
+		[]string{"work_type"},
+	)
+
+	desiredReservations = prometheus.NewGauge(
+		prometheus.GaugeOpts{
+			Namespace: metricNamespace,
+			Name:      "desired_reservations",
+			Help:      "Desired Reservations",
+		},
+	)
+
+	collectors = []prometheus.Collector{
+		status,
+		reservationsOpenedTotal,
+		reservationsClosedTotal,
+		reservationRequestsOutcomeTotal,
+		relayAddressesUpdatedTotal,
+		relayAddressesCount,
+		candidatesCircuitV2SupportTotal,
+		candidatesTotal,
+		candLoopState,
+		scheduledWorkTime,
+		desiredReservations,
+	}
+)
+
+type candidateLoopState int
+
+const (
+	peerSourceRateLimited candidateLoopState = iota
+	waitingOnPeerChan
+	waitingForTrigger
+	stopped
+)
+
+// MetricsTracer is the interface for tracking metrics for autorelay
+type MetricsTracer interface {
+	RelayFinderStatus(isActive bool)
+
+	ReservationEnded(cnt int)
+	ReservationOpened(cnt int)
+	ReservationRequestFinished(isRefresh bool, err error)
+
+	RelayAddressCount(int)
+	RelayAddressUpdated()
+
+	CandidateChecked(supportsCircuitV2 bool)
+	CandidateAdded(cnt int)
+	CandidateRemoved(cnt int)
+	CandidateLoopState(state candidateLoopState)
+
+	ScheduledWorkUpdated(scheduledWork *scheduledWorkTimes)
+
+	DesiredReservations(int)
+}
+
+type metricsTracer struct{}
+
+var _ MetricsTracer = &metricsTracer{}
+
+type metricsTracerSetting struct {
+	reg prometheus.Registerer
+}
+
+type MetricsTracerOption func(*metricsTracerSetting)
+
+func WithRegisterer(reg prometheus.Registerer) MetricsTracerOption {
+	return func(s *metricsTracerSetting) {
+		if reg != nil {
+			s.reg = reg
+		}
+	}
+}
+
+func NewMetricsTracer(opts ...MetricsTracerOption) MetricsTracer {
+	setting := &metricsTracerSetting{reg: prometheus.DefaultRegisterer}
+	for _, opt := range opts {
+		opt(setting)
+	}
+	metricshelper.RegisterCollectors(setting.reg, collectors...)
+
+	// Initialise these counters to 0 otherwise the first reservation requests aren't handled
+	// correctly when using promql increse function
+	reservationRequestsOutcomeTotal.WithLabelValues("refresh", "success")
+	reservationRequestsOutcomeTotal.WithLabelValues("new", "success")
+	candidatesCircuitV2SupportTotal.WithLabelValues("yes")
+	candidatesCircuitV2SupportTotal.WithLabelValues("no")
+	return &metricsTracer{}
+}
+
+func (mt *metricsTracer) RelayFinderStatus(isActive bool) {
+	if isActive {
+		status.Set(1)
+	} else {
+		status.Set(0)
+	}
+}
+
+func (mt *metricsTracer) ReservationEnded(cnt int) {
+	reservationsClosedTotal.Add(float64(cnt))
+}
+
+func (mt *metricsTracer) ReservationOpened(cnt int) {
+	reservationsOpenedTotal.Add(float64(cnt))
+}
+
+func (mt *metricsTracer) ReservationRequestFinished(isRefresh bool, err error) {
+	tags := metricshelper.GetStringSlice()
+	defer metricshelper.PutStringSlice(tags)
+
+	if isRefresh {
+		*tags = append(*tags, "refresh")
+	} else {
+		*tags = append(*tags, "new")
+	}
+	*tags = append(*tags, getReservationRequestStatus(err))
+	reservationRequestsOutcomeTotal.WithLabelValues(*tags...).Inc()
+
+	if !isRefresh && err == nil {
+		reservationsOpenedTotal.Inc()
+	}
+}
+
+func (mt *metricsTracer) RelayAddressUpdated() {
+	relayAddressesUpdatedTotal.Inc()
+}
+
+func (mt *metricsTracer) RelayAddressCount(cnt int) {
+	relayAddressesCount.Set(float64(cnt))
+}
+
+func (mt *metricsTracer) CandidateChecked(supportsCircuitV2 bool) {
+	tags := metricshelper.GetStringSlice()
+	defer metricshelper.PutStringSlice(tags)
+	if supportsCircuitV2 {
+		*tags = append(*tags, "yes")
+	} else {
+		*tags = append(*tags, "no")
+	}
+	candidatesCircuitV2SupportTotal.WithLabelValues(*tags...).Inc()
+}
+
+func (mt *metricsTracer) CandidateAdded(cnt int) {
+	tags := metricshelper.GetStringSlice()
+	defer metricshelper.PutStringSlice(tags)
+	*tags = append(*tags, "added")
+	candidatesTotal.WithLabelValues(*tags...).Add(float64(cnt))
+}
+
+func (mt *metricsTracer) CandidateRemoved(cnt int) {
+	tags := metricshelper.GetStringSlice()
+	defer metricshelper.PutStringSlice(tags)
+	*tags = append(*tags, "removed")
+	candidatesTotal.WithLabelValues(*tags...).Add(float64(cnt))
+}
+
+func (mt *metricsTracer) CandidateLoopState(state candidateLoopState) {
+	candLoopState.Set(float64(state))
+}
+
+func (mt *metricsTracer) ScheduledWorkUpdated(scheduledWork *scheduledWorkTimes) {
+	tags := metricshelper.GetStringSlice()
+	defer metricshelper.PutStringSlice(tags)
+
+	*tags = append(*tags, "allowed peer source call")
+	scheduledWorkTime.WithLabelValues(*tags...).Set(float64(scheduledWork.nextAllowedCallToPeerSource.Unix()))
+	*tags = (*tags)[:0]
+
+	*tags = append(*tags, "reservation refresh")
+	scheduledWorkTime.WithLabelValues(*tags...).Set(float64(scheduledWork.nextRefresh.Unix()))
+	*tags = (*tags)[:0]
+
+	*tags = append(*tags, "clear backoff")
+	scheduledWorkTime.WithLabelValues(*tags...).Set(float64(scheduledWork.nextBackoff.Unix()))
+	*tags = (*tags)[:0]
+
+	*tags = append(*tags, "old candidate check")
+	scheduledWorkTime.WithLabelValues(*tags...).Set(float64(scheduledWork.nextOldCandidateCheck.Unix()))
+}
+
+func (mt *metricsTracer) DesiredReservations(cnt int) {
+	desiredReservations.Set(float64(cnt))
+}
+
+func getReservationRequestStatus(err error) string {
+	if err == nil {
+		return "success"
+	}
+
+	status := "err other"
+	var re client.ReservationError
+	if errors.As(err, &re) {
+		switch re.Status {
+		case pbv2.Status_CONNECTION_FAILED:
+			return "connection failed"
+		case pbv2.Status_MALFORMED_MESSAGE:
+			return "malformed message"
+		case pbv2.Status_RESERVATION_REFUSED:
+			return "reservation refused"
+		case pbv2.Status_PERMISSION_DENIED:
+			return "permission denied"
+		case pbv2.Status_RESOURCE_LIMIT_EXCEEDED:
+			return "resource limit exceeded"
+		}
+	}
+	return status
+}
+
+// wrappedMetricsTracer wraps MetricsTracer and ignores all calls when mt is nil
+type wrappedMetricsTracer struct {
+	mt MetricsTracer
+}
+
+var _ MetricsTracer = &wrappedMetricsTracer{}
+
+func (mt *wrappedMetricsTracer) RelayFinderStatus(isActive bool) {
+	if mt.mt != nil {
+		mt.mt.RelayFinderStatus(isActive)
+	}
+}
+
+func (mt *wrappedMetricsTracer) ReservationEnded(cnt int) {
+	if mt.mt != nil {
+		mt.mt.ReservationEnded(cnt)
+	}
+}
+
+func (mt *wrappedMetricsTracer) ReservationOpened(cnt int) {
+	if mt.mt != nil {
+		mt.mt.ReservationOpened(cnt)
+	}
+}
+
+func (mt *wrappedMetricsTracer) ReservationRequestFinished(isRefresh bool, err error) {
+	if mt.mt != nil {
+		mt.mt.ReservationRequestFinished(isRefresh, err)
+	}
+}
+
+func (mt *wrappedMetricsTracer) RelayAddressUpdated() {
+	if mt.mt != nil {
+		mt.mt.RelayAddressUpdated()
+	}
+}
+
+func (mt *wrappedMetricsTracer) RelayAddressCount(cnt int) {
+	if mt.mt != nil {
+		mt.mt.RelayAddressCount(cnt)
+	}
+}
+
+func (mt *wrappedMetricsTracer) CandidateChecked(supportsCircuitV2 bool) {
+	if mt.mt != nil {
+		mt.mt.CandidateChecked(supportsCircuitV2)
+	}
+}
+
+func (mt *wrappedMetricsTracer) CandidateAdded(cnt int) {
+	if mt.mt != nil {
+		mt.mt.CandidateAdded(cnt)
+	}
+}
+
+func (mt *wrappedMetricsTracer) CandidateRemoved(cnt int) {
+	if mt.mt != nil {
+		mt.mt.CandidateRemoved(cnt)
+	}
+}
+
+func (mt *wrappedMetricsTracer) ScheduledWorkUpdated(scheduledWork *scheduledWorkTimes) {
+	if mt.mt != nil {
+		mt.mt.ScheduledWorkUpdated(scheduledWork)
+	}
+}
+
+func (mt *wrappedMetricsTracer) DesiredReservations(cnt int) {
+	if mt.mt != nil {
+		mt.mt.DesiredReservations(cnt)
+	}
+}
+
+func (mt *wrappedMetricsTracer) CandidateLoopState(state candidateLoopState) {
+	if mt.mt != nil {
+		mt.mt.CandidateLoopState(state)
+	}
+}
--- a/vendor/github.com/libp2p/go-libp2p/p2p/host/autorelay/options.go
+++ b/vendor/github.com/libp2p/go-libp2p/p2p/host/autorelay/options.go
@@ -0,0 +1,233 @@
+package autorelay
+
+import (
+	"context"
+	"errors"
+	"time"
+
+	"github.com/libp2p/go-libp2p/core/peer"
+)
+
+// AutoRelay will call this function when it needs new candidates because it is
+// not connected to the desired number of relays or we get disconnected from one
+// of the relays. Implementations must send *at most* numPeers, and close the
+// channel when they don't intend to provide any more peers. AutoRelay will not
+// call the callback again until the channel is closed. Implementations should
+// send new peers, but may send peers they sent before. AutoRelay implements a
+// per-peer backoff (see WithBackoff). See WithMinInterval for setting the
+// minimum interval between calls to the callback. The context.Context passed
+// may be canceled when AutoRelay feels satisfied, it will be canceled when the
+// node is shutting down. If the context is canceled you MUST close the output
+// channel at some point.
+type PeerSource func(ctx context.Context, num int) <-chan peer.AddrInfo
+
+type config struct {
+	clock      ClockWithInstantTimer
+	peerSource PeerSource
+	// minimum interval used to call the peerSource callback
+	minInterval time.Duration
+	// see WithMinCandidates
+	minCandidates int
+	// see WithMaxCandidates
+	maxCandidates int
+	// Delay until we obtain reservations with relays, if we have less than minCandidates candidates.
+	// See WithBootDelay.
+	bootDelay time.Duration
+	// backoff is the time we wait after failing to obtain a reservation with a candidate
+	backoff time.Duration
+	// Number of relays we strive to obtain a reservation with.
+	desiredRelays int
+	// see WithMaxCandidateAge
+	maxCandidateAge  time.Duration
+	setMinCandidates bool
+	// see WithMetricsTracer
+	metricsTracer MetricsTracer
+}
+
+var defaultConfig = config{
+	clock:           RealClock{},
+	minCandidates:   4,
+	maxCandidates:   20,
+	bootDelay:       3 * time.Minute,
+	backoff:         time.Hour,
+	desiredRelays:   2,
+	maxCandidateAge: 30 * time.Minute,
+	minInterval:     30 * time.Second,
+}
+
+var (
+	errAlreadyHavePeerSource = errors.New("can only use a single WithPeerSource or WithStaticRelays")
+)
+
+type Option func(*config) error
+
+func WithStaticRelays(static []peer.AddrInfo) Option {
+	return func(c *config) error {
+		if c.peerSource != nil {
+			return errAlreadyHavePeerSource
+		}
+
+		WithPeerSource(func(ctx context.Context, numPeers int) <-chan peer.AddrInfo {
+			if len(static) < numPeers {
+				numPeers = len(static)
+			}
+			c := make(chan peer.AddrInfo, numPeers)
+			defer close(c)
+
+			for i := 0; i < numPeers; i++ {
+				c <- static[i]
+			}
+			return c
+		})(c)
+		WithMinCandidates(len(static))(c)
+		WithMaxCandidates(len(static))(c)
+		WithNumRelays(len(static))(c)
+
+		return nil
+	}
+}
+
+// WithPeerSource defines a callback for AutoRelay to query for more relay candidates.
+func WithPeerSource(f PeerSource) Option {
+	return func(c *config) error {
+		if c.peerSource != nil {
+			return errAlreadyHavePeerSource
+		}
+		c.peerSource = f
+		return nil
+	}
+}
+
+// WithNumRelays sets the number of relays we strive to obtain reservations with.
+func WithNumRelays(n int) Option {
+	return func(c *config) error {
+		c.desiredRelays = n
+		return nil
+	}
+}
+
+// WithMaxCandidates sets the number of relay candidates that we buffer.
+func WithMaxCandidates(n int) Option {
+	return func(c *config) error {
+		c.maxCandidates = n
+		if c.minCandidates > n {
+			c.minCandidates = n
+		}
+		return nil
+	}
+}
+
+// WithMinCandidates sets the minimum number of relay candidates we collect before to get a reservation
+// with any of them (unless we've been running for longer than the boot delay).
+// This is to make sure that we don't just randomly connect to the first candidate that we discover.
+func WithMinCandidates(n int) Option {
+	return func(c *config) error {
+		if n > c.maxCandidates {
+			n = c.maxCandidates
+		}
+		c.minCandidates = n
+		c.setMinCandidates = true
+		return nil
+	}
+}
+
+// WithBootDelay set the boot delay for finding relays.
+// We won't attempt any reservation if we've have less than a minimum number of candidates.
+// This prevents us to connect to the "first best" relay, and allows us to carefully select the relay.
+// However, in case we haven't found enough relays after the boot delay, we use what we have.
+func WithBootDelay(d time.Duration) Option {
+	return func(c *config) error {
+		c.bootDelay = d
+		return nil
+	}
+}
+
+// WithBackoff sets the time we wait after failing to obtain a reservation with a candidate.
+func WithBackoff(d time.Duration) Option {
+	return func(c *config) error {
+		c.backoff = d
+		return nil
+	}
+}
+
+// WithMaxCandidateAge sets the maximum age of a candidate.
+// When we are connected to the desired number of relays, we don't ask the peer source for new candidates.
+// This can lead to AutoRelay's candidate list becoming outdated, and means we won't be able
+// to quickly establish a new relay connection if our existing connection breaks, if all the candidates
+// have become stale.
+func WithMaxCandidateAge(d time.Duration) Option {
+	return func(c *config) error {
+		c.maxCandidateAge = d
+		return nil
+	}
+}
+
+// InstantTimer is a timer that triggers at some instant rather than some duration
+type InstantTimer interface {
+	Reset(d time.Time) bool
+	Stop() bool
+	Ch() <-chan time.Time
+}
+
+// ClockWithInstantTimer is a clock that can create timers that trigger at some
+// instant rather than some duration
+type ClockWithInstantTimer interface {
+	Now() time.Time
+	Since(t time.Time) time.Duration
+	InstantTimer(when time.Time) InstantTimer
+}
+
+type RealTimer struct{ t *time.Timer }
+
+var _ InstantTimer = (*RealTimer)(nil)
+
+func (t RealTimer) Ch() <-chan time.Time {
+	return t.t.C
+}
+
+func (t RealTimer) Reset(d time.Time) bool {
+	return t.t.Reset(time.Until(d))
+}
+
+func (t RealTimer) Stop() bool {
+	return t.t.Stop()
+}
+
+type RealClock struct{}
+
+var _ ClockWithInstantTimer = RealClock{}
+
+func (RealClock) Now() time.Time {
+	return time.Now()
+}
+func (RealClock) Since(t time.Time) time.Duration {
+	return time.Since(t)
+}
+func (RealClock) InstantTimer(when time.Time) InstantTimer {
+	t := time.NewTimer(time.Until(when))
+	return &RealTimer{t}
+}
+
+func WithClock(cl ClockWithInstantTimer) Option {
+	return func(c *config) error {
+		c.clock = cl
+		return nil
+	}
+}
+
+// WithMinInterval sets the minimum interval after which peerSource callback will be called for more
+// candidates even if AutoRelay needs new candidates.
+func WithMinInterval(interval time.Duration) Option {
+	return func(c *config) error {
+		c.minInterval = interval
+		return nil
+	}
+}
+
+// WithMetricsTracer configures autorelay to use mt to track metrics
+func WithMetricsTracer(mt MetricsTracer) Option {
+	return func(c *config) error {
+		c.metricsTracer = mt
+		return nil
+	}
+}
--- a/vendor/github.com/libp2p/go-libp2p/p2p/host/autorelay/relay.go
+++ b/vendor/github.com/libp2p/go-libp2p/p2p/host/autorelay/relay.go
@@ -0,0 +1,17 @@
+package autorelay
+
+import (
+	ma "github.com/multiformats/go-multiaddr"
+)
+
+// Filter filters out all relay addresses.
+func Filter(addrs []ma.Multiaddr) []ma.Multiaddr {
+	raddrs := make([]ma.Multiaddr, 0, len(addrs))
+	for _, addr := range addrs {
+		if isRelayAddr(addr) {
+			continue
+		}
+		raddrs = append(raddrs, addr)
+	}
+	return raddrs
+}
--- a/vendor/github.com/libp2p/go-libp2p/p2p/host/autorelay/relay_finder.go
+++ b/vendor/github.com/libp2p/go-libp2p/p2p/host/autorelay/relay_finder.go
@@ -0,0 +1,810 @@
+package autorelay
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"math/rand"
+	"sync"
+	"time"
+
+	"golang.org/x/sync/errgroup"
+
+	"github.com/libp2p/go-libp2p/core/event"
+	"github.com/libp2p/go-libp2p/core/network"
+	"github.com/libp2p/go-libp2p/core/peer"
+	basic "github.com/libp2p/go-libp2p/p2p/host/basic"
+	"github.com/libp2p/go-libp2p/p2p/host/eventbus"
+	circuitv2 "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/client"
+	circuitv2_proto "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/proto"
+
+	ma "github.com/multiformats/go-multiaddr"
+	manet "github.com/multiformats/go-multiaddr/net"
+)
+
+const protoIDv2 = circuitv2_proto.ProtoIDv2Hop
+
+// Terminology:
+// Candidate: Once we connect to a node and it supports relay protocol,
+// we call it a candidate, and consider using it as a relay.
+// Relay: Out of the list of candidates, we select a relay to connect to.
+// Currently, we just randomly select a candidate, but we can employ more sophisticated
+// selection strategies here (e.g. by facotring in the RTT).
+
+const (
+	rsvpRefreshInterval = time.Minute
+	rsvpExpirationSlack = 2 * time.Minute
+
+	autorelayTag = "autorelay"
+)
+
+type candidate struct {
+	added           time.Time
+	supportsRelayV2 bool
+	ai              peer.AddrInfo
+}
+
+// relayFinder is a Host that uses relays for connectivity when a NAT is detected.
+type relayFinder struct {
+	bootTime time.Time
+	host     *basic.BasicHost
+
+	conf *config
+
+	refCount sync.WaitGroup
+
+	ctxCancel   context.CancelFunc
+	ctxCancelMx sync.Mutex
+
+	peerSource PeerSource
+
+	candidateFound             chan struct{} // receives every time we find a new relay candidate
+	candidateMx                sync.Mutex
+	candidates                 map[peer.ID]*candidate
+	backoff                    map[peer.ID]time.Time
+	maybeConnectToRelayTrigger chan struct{} // cap: 1
+	// Any time _something_ hapens that might cause us to need new candidates.
+	// This could be
+	// * the disconnection of a relay
+	// * the failed attempt to obtain a reservation with a current candidate
+	// * a candidate is deleted due to its age
+	maybeRequestNewCandidates chan struct{} // cap: 1.
+
+	relayUpdated chan struct{}
+
+	relayMx sync.Mutex
+	relays  map[peer.ID]*circuitv2.Reservation
+
+	cachedAddrs       []ma.Multiaddr
+	cachedAddrsExpiry time.Time
+
+	// A channel that triggers a run of `runScheduledWork`.
+	triggerRunScheduledWork chan struct{}
+	metricsTracer           MetricsTracer
+}
+
+var errAlreadyRunning = errors.New("relayFinder already running")
+
+func newRelayFinder(host *basic.BasicHost, peerSource PeerSource, conf *config) *relayFinder {
+	if peerSource == nil {
+		panic("Can not create a new relayFinder. Need a Peer Source fn or a list of static relays. Refer to the documentation around `libp2p.EnableAutoRelay`")
+	}
+
+	return &relayFinder{
+		bootTime:                   conf.clock.Now(),
+		host:                       host,
+		conf:                       conf,
+		peerSource:                 peerSource,
+		candidates:                 make(map[peer.ID]*candidate),
+		backoff:                    make(map[peer.ID]time.Time),
+		candidateFound:             make(chan struct{}, 1),
+		maybeConnectToRelayTrigger: make(chan struct{}, 1),
+		maybeRequestNewCandidates:  make(chan struct{}, 1),
+		triggerRunScheduledWork:    make(chan struct{}, 1),
+		relays:                     make(map[peer.ID]*circuitv2.Reservation),
+		relayUpdated:               make(chan struct{}, 1),
+		metricsTracer:              &wrappedMetricsTracer{conf.metricsTracer},
+	}
+}
+
+type scheduledWorkTimes struct {
+	leastFrequentInterval       time.Duration
+	nextRefresh                 time.Time
+	nextBackoff                 time.Time
+	nextOldCandidateCheck       time.Time
+	nextAllowedCallToPeerSource time.Time
+}
+
+func (rf *relayFinder) background(ctx context.Context) {
+	peerSourceRateLimiter := make(chan struct{}, 1)
+	rf.refCount.Add(1)
+	go func() {
+		defer rf.refCount.Done()
+		rf.findNodes(ctx, peerSourceRateLimiter)
+	}()
+
+	rf.refCount.Add(1)
+	go func() {
+		defer rf.refCount.Done()
+		rf.handleNewCandidates(ctx)
+	}()
+
+	subConnectedness, err := rf.host.EventBus().Subscribe(new(event.EvtPeerConnectednessChanged), eventbus.Name("autorelay (relay finder)"))
+	if err != nil {
+		log.Error("failed to subscribe to the EvtPeerConnectednessChanged")
+		return
+	}
+	defer subConnectedness.Close()
+
+	now := rf.conf.clock.Now()
+	bootDelayTimer := rf.conf.clock.InstantTimer(now.Add(rf.conf.bootDelay))
+	defer bootDelayTimer.Stop()
+
+	// This is the least frequent event. It's our fallback timer if we don't have any other work to do.
+	leastFrequentInterval := rf.conf.minInterval
+	// Check if leastFrequentInterval is 0 to avoid busy looping
+	if rf.conf.backoff > leastFrequentInterval || leastFrequentInterval == 0 {
+		leastFrequentInterval = rf.conf.backoff
+	}
+	if rf.conf.maxCandidateAge > leastFrequentInterval || leastFrequentInterval == 0 {
+		leastFrequentInterval = rf.conf.maxCandidateAge
+	}
+	if rsvpRefreshInterval > leastFrequentInterval || leastFrequentInterval == 0 {
+		leastFrequentInterval = rsvpRefreshInterval
+	}
+
+	scheduledWork := &scheduledWorkTimes{
+		leastFrequentInterval:       leastFrequentInterval,
+		nextRefresh:                 now.Add(rsvpRefreshInterval),
+		nextBackoff:                 now.Add(rf.conf.backoff),
+		nextOldCandidateCheck:       now.Add(rf.conf.maxCandidateAge),
+		nextAllowedCallToPeerSource: now.Add(-time.Second), // allow immediately
+	}
+
+	workTimer := rf.conf.clock.InstantTimer(rf.runScheduledWork(ctx, now, scheduledWork, peerSourceRateLimiter))
+	defer workTimer.Stop()
+
+	for {
+		select {
+		case ev, ok := <-subConnectedness.Out():
+			if !ok {
+				return
+			}
+			evt := ev.(event.EvtPeerConnectednessChanged)
+			if evt.Connectedness != network.NotConnected {
+				continue
+			}
+			push := false
+
+			rf.relayMx.Lock()
+			if rf.usingRelay(evt.Peer) { // we were disconnected from a relay
+				log.Debugw("disconnected from relay", "id", evt.Peer)
+				delete(rf.relays, evt.Peer)
+				rf.notifyMaybeConnectToRelay()
+				rf.notifyMaybeNeedNewCandidates()
+				push = true
+			}
+			rf.relayMx.Unlock()
+
+			if push {
+				rf.clearCachedAddrsAndSignalAddressChange()
+				rf.metricsTracer.ReservationEnded(1)
+			}
+		case <-rf.candidateFound:
+			rf.notifyMaybeConnectToRelay()
+		case <-bootDelayTimer.Ch():
+			rf.notifyMaybeConnectToRelay()
+		case <-rf.relayUpdated:
+			rf.clearCachedAddrsAndSignalAddressChange()
+		case now := <-workTimer.Ch():
+			// Note: `now` is not guaranteed to be the current time. It's the time
+			// that the timer was fired. This is okay because we'll schedule
+			// future work at a specific time.
+			nextTime := rf.runScheduledWork(ctx, now, scheduledWork, peerSourceRateLimiter)
+			workTimer.Reset(nextTime)
+		case <-rf.triggerRunScheduledWork:
+			// Ignore the next time because we aren't scheduling any future work here
+			_ = rf.runScheduledWork(ctx, rf.conf.clock.Now(), scheduledWork, peerSourceRateLimiter)
+		case <-ctx.Done():
+			return
+		}
+	}
+}
+
+func (rf *relayFinder) clearCachedAddrsAndSignalAddressChange() {
+	rf.relayMx.Lock()
+	rf.cachedAddrs = nil
+	rf.relayMx.Unlock()
+	rf.host.SignalAddressChange()
+
+	rf.metricsTracer.RelayAddressUpdated()
+}
+
+func (rf *relayFinder) runScheduledWork(ctx context.Context, now time.Time, scheduledWork *scheduledWorkTimes, peerSourceRateLimiter chan<- struct{}) time.Time {
+	nextTime := now.Add(scheduledWork.leastFrequentInterval)
+
+	if now.After(scheduledWork.nextRefresh) {
+		scheduledWork.nextRefresh = now.Add(rsvpRefreshInterval)
+		if rf.refreshReservations(ctx, now) {
+			rf.clearCachedAddrsAndSignalAddressChange()
+		}
+	}
+
+	if now.After(scheduledWork.nextBackoff) {
+		scheduledWork.nextBackoff = rf.clearBackoff(now)
+	}
+
+	if now.After(scheduledWork.nextOldCandidateCheck) {
+		scheduledWork.nextOldCandidateCheck = rf.clearOldCandidates(now)
+	}
+
+	if now.After(scheduledWork.nextAllowedCallToPeerSource) {
+		select {
+		case peerSourceRateLimiter <- struct{}{}:
+			scheduledWork.nextAllowedCallToPeerSource = now.Add(rf.conf.minInterval)
+			if scheduledWork.nextAllowedCallToPeerSource.Before(nextTime) {
+				nextTime = scheduledWork.nextAllowedCallToPeerSource
+			}
+		default:
+		}
+	} else {
+		// We still need to schedule this work if it's sooner than nextTime
+		if scheduledWork.nextAllowedCallToPeerSource.Before(nextTime) {
+			nextTime = scheduledWork.nextAllowedCallToPeerSource
+		}
+	}
+
+	// Find the next time we need to run scheduled work.
+	if scheduledWork.nextRefresh.Before(nextTime) {
+		nextTime = scheduledWork.nextRefresh
+	}
+	if scheduledWork.nextBackoff.Before(nextTime) {
+		nextTime = scheduledWork.nextBackoff
+	}
+	if scheduledWork.nextOldCandidateCheck.Before(nextTime) {
+		nextTime = scheduledWork.nextOldCandidateCheck
+	}
+	if nextTime == now {
+		// Only happens in CI with a mock clock
+		nextTime = nextTime.Add(1) // avoids an infinite loop
+	}
+
+	rf.metricsTracer.ScheduledWorkUpdated(scheduledWork)
+
+	return nextTime
+}
+
+// clearOldCandidates clears old candidates from the map. Returns the next time
+// to run this function.
+func (rf *relayFinder) clearOldCandidates(now time.Time) time.Time {
+	// If we don't have any candidates, we should run this again in rf.conf.maxCandidateAge.
+	nextTime := now.Add(rf.conf.maxCandidateAge)
+
+	var deleted bool
+	rf.candidateMx.Lock()
+	defer rf.candidateMx.Unlock()
+	for id, cand := range rf.candidates {
+		expiry := cand.added.Add(rf.conf.maxCandidateAge)
+		if expiry.After(now) {
+			if expiry.Before(nextTime) {
+				nextTime = expiry
+			}
+		} else {
+			log.Debugw("deleting candidate due to age", "id", id)
+			deleted = true
+			rf.removeCandidate(id)
+		}
+	}
+	if deleted {
+		rf.notifyMaybeNeedNewCandidates()
+	}
+
+	return nextTime
+}
+
+// clearBackoff clears old backoff entries from the map. Returns the next time
+// to run this function.
+func (rf *relayFinder) clearBackoff(now time.Time) time.Time {
+	nextTime := now.Add(rf.conf.backoff)
+
+	rf.candidateMx.Lock()
+	defer rf.candidateMx.Unlock()
+	for id, t := range rf.backoff {
+		expiry := t.Add(rf.conf.backoff)
+		if expiry.After(now) {
+			if expiry.Before(nextTime) {
+				nextTime = expiry
+			}
+		} else {
+			log.Debugw("removing backoff for node", "id", id)
+			delete(rf.backoff, id)
+		}
+	}
+
+	return nextTime
+}
+
+// findNodes accepts nodes from the channel and tests if they support relaying.
+// It is run on both public and private nodes.
+// It garbage collects old entries, so that nodes doesn't overflow.
+// This makes sure that as soon as we need to find relay candidates, we have them available.
+// peerSourceRateLimiter is used to limit how often we call the peer source.
+func (rf *relayFinder) findNodes(ctx context.Context, peerSourceRateLimiter <-chan struct{}) {
+	var peerChan <-chan peer.AddrInfo
+	var wg sync.WaitGroup
+	for {
+		rf.candidateMx.Lock()
+		numCandidates := len(rf.candidates)
+		rf.candidateMx.Unlock()
+
+		if peerChan == nil && numCandidates < rf.conf.minCandidates {
+			rf.metricsTracer.CandidateLoopState(peerSourceRateLimited)
+
+			select {
+			case <-peerSourceRateLimiter:
+				peerChan = rf.peerSource(ctx, rf.conf.maxCandidates)
+				select {
+				case rf.triggerRunScheduledWork <- struct{}{}:
+				default:
+				}
+			case <-ctx.Done():
+				return
+			}
+		}
+
+		if peerChan == nil {
+			rf.metricsTracer.CandidateLoopState(waitingForTrigger)
+		} else {
+			rf.metricsTracer.CandidateLoopState(waitingOnPeerChan)
+		}
+
+		select {
+		case <-rf.maybeRequestNewCandidates:
+			continue
+		case pi, ok := <-peerChan:
+			if !ok {
+				wg.Wait()
+				peerChan = nil
+				continue
+			}
+			log.Debugw("found node", "id", pi.ID)
+			rf.candidateMx.Lock()
+			numCandidates := len(rf.candidates)
+			backoffStart, isOnBackoff := rf.backoff[pi.ID]
+			rf.candidateMx.Unlock()
+			if isOnBackoff {
+				log.Debugw("skipping node that we recently failed to obtain a reservation with", "id", pi.ID, "last attempt", rf.conf.clock.Since(backoffStart))
+				continue
+			}
+			if numCandidates >= rf.conf.maxCandidates {
+				log.Debugw("skipping node. Already have enough candidates", "id", pi.ID, "num", numCandidates, "max", rf.conf.maxCandidates)
+				continue
+			}
+			rf.refCount.Add(1)
+			wg.Add(1)
+			go func() {
+				defer rf.refCount.Done()
+				defer wg.Done()
+				if added := rf.handleNewNode(ctx, pi); added {
+					rf.notifyNewCandidate()
+				}
+			}()
+		case <-ctx.Done():
+			rf.metricsTracer.CandidateLoopState(stopped)
+			return
+		}
+	}
+}
+
+func (rf *relayFinder) notifyMaybeConnectToRelay() {
+	select {
+	case rf.maybeConnectToRelayTrigger <- struct{}{}:
+	default:
+	}
+}
+
+func (rf *relayFinder) notifyMaybeNeedNewCandidates() {
+	select {
+	case rf.maybeRequestNewCandidates <- struct{}{}:
+	default:
+	}
+}
+
+func (rf *relayFinder) notifyNewCandidate() {
+	select {
+	case rf.candidateFound <- struct{}{}:
+	default:
+	}
+}
+
+// handleNewNode tests if a peer supports circuit v2.
+// This method is only run on private nodes.
+// If a peer does, it is added to the candidates map.
+// Note that just supporting the protocol doesn't guarantee that we can also obtain a reservation.
+func (rf *relayFinder) handleNewNode(ctx context.Context, pi peer.AddrInfo) (added bool) {
+	rf.relayMx.Lock()
+	relayInUse := rf.usingRelay(pi.ID)
+	rf.relayMx.Unlock()
+	if relayInUse {
+		return false
+	}
+
+	ctx, cancel := context.WithTimeout(ctx, 20*time.Second)
+	defer cancel()
+	supportsV2, err := rf.tryNode(ctx, pi)
+	if err != nil {
+		log.Debugf("node %s not accepted as a candidate: %s", pi.ID, err)
+		if err == errProtocolNotSupported {
+			rf.metricsTracer.CandidateChecked(false)
+		}
+		return false
+	}
+	rf.metricsTracer.CandidateChecked(true)
+
+	rf.candidateMx.Lock()
+	if len(rf.candidates) > rf.conf.maxCandidates {
+		rf.candidateMx.Unlock()
+		return false
+	}
+	log.Debugw("node supports relay protocol", "peer", pi.ID, "supports circuit v2", supportsV2)
+	rf.addCandidate(&candidate{
+		added:           rf.conf.clock.Now(),
+		ai:              pi,
+		supportsRelayV2: supportsV2,
+	})
+	rf.candidateMx.Unlock()
+	return true
+}
+
+var errProtocolNotSupported = errors.New("doesn't speak circuit v2")
+
+// tryNode checks if a peer actually supports either circuit v2.
+// It does not modify any internal state.
+func (rf *relayFinder) tryNode(ctx context.Context, pi peer.AddrInfo) (supportsRelayV2 bool, err error) {
+	if err := rf.host.Connect(ctx, pi); err != nil {
+		return false, fmt.Errorf("error connecting to relay %s: %w", pi.ID, err)
+	}
+
+	conns := rf.host.Network().ConnsToPeer(pi.ID)
+	for _, conn := range conns {
+		if isRelayAddr(conn.RemoteMultiaddr()) {
+			return false, errors.New("not a public node")
+		}
+	}
+
+	// wait for identify to complete in at least one conn so that we can check the supported protocols
+	ready := make(chan struct{}, 1)
+	for _, conn := range conns {
+		go func(conn network.Conn) {
+			select {
+			case <-rf.host.IDService().IdentifyWait(conn):
+				select {
+				case ready <- struct{}{}:
+				default:
+				}
+			case <-ctx.Done():
+			}
+		}(conn)
+	}
+
+	select {
+	case <-ready:
+	case <-ctx.Done():
+		return false, ctx.Err()
+	}
+
+	protos, err := rf.host.Peerstore().SupportsProtocols(pi.ID, protoIDv2)
+	if err != nil {
+		return false, fmt.Errorf("error checking relay protocol support for peer %s: %w", pi.ID, err)
+	}
+	if len(protos) == 0 {
+		return false, errProtocolNotSupported
+	}
+	return true, nil
+}
+
+// When a new node that could be a relay is found, we receive a notification on the maybeConnectToRelayTrigger chan.
+// This function makes sure that we only run one instance of maybeConnectToRelay at once, and buffers
+// exactly one more trigger event to run maybeConnectToRelay.
+func (rf *relayFinder) handleNewCandidates(ctx context.Context) {
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-rf.maybeConnectToRelayTrigger:
+			rf.maybeConnectToRelay(ctx)
+		}
+	}
+}
+
+func (rf *relayFinder) maybeConnectToRelay(ctx context.Context) {
+	rf.relayMx.Lock()
+	numRelays := len(rf.relays)
+	rf.relayMx.Unlock()
+	// We're already connected to our desired number of relays. Nothing to do here.
+	if numRelays == rf.conf.desiredRelays {
+		return
+	}
+
+	rf.candidateMx.Lock()
+	if len(rf.relays) == 0 && len(rf.candidates) < rf.conf.minCandidates && rf.conf.clock.Since(rf.bootTime) < rf.conf.bootDelay {
+		// During the startup phase, we don't want to connect to the first candidate that we find.
+		// Instead, we wait until we've found at least minCandidates, and then select the best of those.
+		// However, if that takes too long (longer than bootDelay), we still go ahead.
+		rf.candidateMx.Unlock()
+		return
+	}
+	if len(rf.candidates) == 0 {
+		rf.candidateMx.Unlock()
+		return
+	}
+	candidates := rf.selectCandidates()
+	rf.candidateMx.Unlock()
+
+	// We now iterate over the candidates, attempting (sequentially) to get reservations with them, until
+	// we reach the desired number of relays.
+	for _, cand := range candidates {
+		id := cand.ai.ID
+		rf.relayMx.Lock()
+		usingRelay := rf.usingRelay(id)
+		rf.relayMx.Unlock()
+		if usingRelay {
+			rf.candidateMx.Lock()
+			rf.removeCandidate(id)
+			rf.candidateMx.Unlock()
+			rf.notifyMaybeNeedNewCandidates()
+			continue
+		}
+		rsvp, err := rf.connectToRelay(ctx, cand)
+		if err != nil {
+			log.Debugw("failed to connect to relay", "peer", id, "error", err)
+			rf.notifyMaybeNeedNewCandidates()
+			rf.metricsTracer.ReservationRequestFinished(false, err)
+			continue
+		}
+		log.Debugw("adding new relay", "id", id)
+		rf.relayMx.Lock()
+		rf.relays[id] = rsvp
+		numRelays := len(rf.relays)
+		rf.relayMx.Unlock()
+		rf.notifyMaybeNeedNewCandidates()
+
+		rf.host.ConnManager().Protect(id, autorelayTag) // protect the connection
+
+		select {
+		case rf.relayUpdated <- struct{}{}:
+		default:
+		}
+
+		rf.metricsTracer.ReservationRequestFinished(false, nil)
+
+		if numRelays >= rf.conf.desiredRelays {
+			break
+		}
+	}
+}
+
+func (rf *relayFinder) connectToRelay(ctx context.Context, cand *candidate) (*circuitv2.Reservation, error) {
+	id := cand.ai.ID
+
+	ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
+	defer cancel()
+
+	var rsvp *circuitv2.Reservation
+
+	// make sure we're still connected.
+	if rf.host.Network().Connectedness(id) != network.Connected {
+		if err := rf.host.Connect(ctx, cand.ai); err != nil {
+			rf.candidateMx.Lock()
+			rf.removeCandidate(cand.ai.ID)
+			rf.candidateMx.Unlock()
+			return nil, fmt.Errorf("failed to connect: %w", err)
+		}
+	}
+
+	rf.candidateMx.Lock()
+	rf.backoff[id] = rf.conf.clock.Now()
+	rf.candidateMx.Unlock()
+	var err error
+	if cand.supportsRelayV2 {
+		rsvp, err = circuitv2.Reserve(ctx, rf.host, cand.ai)
+		if err != nil {
+			err = fmt.Errorf("failed to reserve slot: %w", err)
+		}
+	}
+	rf.candidateMx.Lock()
+	rf.removeCandidate(id)
+	rf.candidateMx.Unlock()
+	return rsvp, err
+}
+
+func (rf *relayFinder) refreshReservations(ctx context.Context, now time.Time) bool {
+	rf.relayMx.Lock()
+
+	// find reservations about to expire and refresh them in parallel
+	g := new(errgroup.Group)
+	for p, rsvp := range rf.relays {
+		if now.Add(rsvpExpirationSlack).Before(rsvp.Expiration) {
+			continue
+		}
+
+		p := p
+		g.Go(func() error {
+			err := rf.refreshRelayReservation(ctx, p)
+			rf.metricsTracer.ReservationRequestFinished(true, err)
+
+			return err
+		})
+	}
+	rf.relayMx.Unlock()
+
+	err := g.Wait()
+	return err != nil
+}
+
+func (rf *relayFinder) refreshRelayReservation(ctx context.Context, p peer.ID) error {
+	rsvp, err := circuitv2.Reserve(ctx, rf.host, peer.AddrInfo{ID: p})
+
+	rf.relayMx.Lock()
+	if err != nil {
+		log.Debugw("failed to refresh relay slot reservation", "relay", p, "error", err)
+		_, exists := rf.relays[p]
+		delete(rf.relays, p)
+		// unprotect the connection
+		rf.host.ConnManager().Unprotect(p, autorelayTag)
+		rf.relayMx.Unlock()
+		if exists {
+			rf.metricsTracer.ReservationEnded(1)
+		}
+		return err
+	}
+
+	log.Debugw("refreshed relay slot reservation", "relay", p)
+	rf.relays[p] = rsvp
+	rf.relayMx.Unlock()
+	return nil
+}
+
+// usingRelay returns if we're currently using the given relay.
+func (rf *relayFinder) usingRelay(p peer.ID) bool {
+	_, ok := rf.relays[p]
+	return ok
+}
+
+// addCandidates adds a candidate to the candidates set. Assumes caller holds candidateMx mutex
+func (rf *relayFinder) addCandidate(cand *candidate) {
+	_, exists := rf.candidates[cand.ai.ID]
+	rf.candidates[cand.ai.ID] = cand
+	if !exists {
+		rf.metricsTracer.CandidateAdded(1)
+	}
+}
+
+func (rf *relayFinder) removeCandidate(id peer.ID) {
+	_, exists := rf.candidates[id]
+	if exists {
+		delete(rf.candidates, id)
+		rf.metricsTracer.CandidateRemoved(1)
+	}
+}
+
+// selectCandidates returns an ordered slice of relay candidates.
+// Callers should attempt to obtain reservations with the candidates in this order.
+func (rf *relayFinder) selectCandidates() []*candidate {
+	now := rf.conf.clock.Now()
+	candidates := make([]*candidate, 0, len(rf.candidates))
+	for _, cand := range rf.candidates {
+		if cand.added.Add(rf.conf.maxCandidateAge).After(now) {
+			candidates = append(candidates, cand)
+		}
+	}
+
+	// TODO: better relay selection strategy; this just selects random relays,
+	// but we should probably use ping latency as the selection metric
+	rand.Shuffle(len(candidates), func(i, j int) {
+		candidates[i], candidates[j] = candidates[j], candidates[i]
+	})
+	return candidates
+}
+
+// This function is computes the NATed relay addrs when our status is private:
+//   - The public addrs are removed from the address set.
+//   - The non-public addrs are included verbatim so that peers behind the same NAT/firewall
+//     can still dial us directly.
+//   - On top of those, we add the relay-specific addrs for the relays to which we are
+//     connected. For each non-private relay addr, we encapsulate the p2p-circuit addr
+//     through which we can be dialed.
+func (rf *relayFinder) relayAddrs(addrs []ma.Multiaddr) []ma.Multiaddr {
+	rf.relayMx.Lock()
+	defer rf.relayMx.Unlock()
+
+	if rf.cachedAddrs != nil && rf.conf.clock.Now().Before(rf.cachedAddrsExpiry) {
+		return rf.cachedAddrs
+	}
+
+	raddrs := make([]ma.Multiaddr, 0, 4*len(rf.relays)+4)
+
+	// only keep private addrs from the original addr set
+	for _, addr := range addrs {
+		if manet.IsPrivateAddr(addr) {
+			raddrs = append(raddrs, addr)
+		}
+	}
+
+	// add relay specific addrs to the list
+	relayAddrCnt := 0
+	for p := range rf.relays {
+		addrs := cleanupAddressSet(rf.host.Peerstore().Addrs(p))
+		relayAddrCnt += len(addrs)
+		circuit := ma.StringCast(fmt.Sprintf("/p2p/%s/p2p-circuit", p))
+		for _, addr := range addrs {
+			pub := addr.Encapsulate(circuit)
+			raddrs = append(raddrs, pub)
+		}
+	}
+
+	rf.cachedAddrs = raddrs
+	rf.cachedAddrsExpiry = rf.conf.clock.Now().Add(30 * time.Second)
+
+	rf.metricsTracer.RelayAddressCount(relayAddrCnt)
+	return raddrs
+}
+
+func (rf *relayFinder) Start() error {
+	rf.ctxCancelMx.Lock()
+	defer rf.ctxCancelMx.Unlock()
+	if rf.ctxCancel != nil {
+		return errAlreadyRunning
+	}
+	log.Debug("starting relay finder")
+
+	rf.initMetrics()
+
+	ctx, cancel := context.WithCancel(context.Background())
+	rf.ctxCancel = cancel
+	rf.refCount.Add(1)
+	go func() {
+		defer rf.refCount.Done()
+		rf.background(ctx)
+	}()
+	return nil
+}
+
+func (rf *relayFinder) Stop() error {
+	rf.ctxCancelMx.Lock()
+	defer rf.ctxCancelMx.Unlock()
+	log.Debug("stopping relay finder")
+	if rf.ctxCancel != nil {
+		rf.ctxCancel()
+	}
+	rf.refCount.Wait()
+	rf.ctxCancel = nil
+
+	rf.resetMetrics()
+	return nil
+}
+
+func (rf *relayFinder) initMetrics() {
+	rf.metricsTracer.DesiredReservations(rf.conf.desiredRelays)
+
+	rf.relayMx.Lock()
+	rf.metricsTracer.ReservationOpened(len(rf.relays))
+	rf.relayMx.Unlock()
+
+	rf.candidateMx.Lock()
+	rf.metricsTracer.CandidateAdded(len(rf.candidates))
+	rf.candidateMx.Unlock()
+}
+
+func (rf *relayFinder) resetMetrics() {
+	rf.relayMx.Lock()
+	rf.metricsTracer.ReservationEnded(len(rf.relays))
+	rf.relayMx.Unlock()
+
+	rf.candidateMx.Lock()
+	rf.metricsTracer.CandidateRemoved(len(rf.candidates))
+	rf.candidateMx.Unlock()
+
+	rf.metricsTracer.RelayAddressCount(0)
+	rf.metricsTracer.ScheduledWorkUpdated(&scheduledWorkTimes{})
+}