Integrate BACKBEAT SDK and resolve KACHING license validation

Major integrations and fixes:
- Added BACKBEAT SDK integration for P2P operation timing
- Implemented beat-aware status tracking for distributed operations
- Added Docker secrets support for secure license management
- Resolved KACHING license validation via HTTPS/TLS
- Updated docker-compose configuration for clean stack deployment
- Disabled rollback policies to prevent deployment failures
- Added license credential storage (CHORUS-DEV-MULTI-001)

Technical improvements:
- BACKBEAT P2P operation tracking with phase management
- Enhanced configuration system with file-based secrets
- Improved error handling for license validation
- Clean separation of KACHING and CHORUS deployment stacks

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-09-06 07:56:26 +10:00
parent 543ab216f9
commit 9bdcbe0447
4730 changed files with 1480093 additions and 1916 deletions

View File

@@ -0,0 +1,279 @@
package swarm
import (
"fmt"
"sync"
ma "github.com/multiformats/go-multiaddr"
manet "github.com/multiformats/go-multiaddr/net"
)
type blackHoleState int
const (
blackHoleStateProbing blackHoleState = iota
blackHoleStateAllowed
blackHoleStateBlocked
)
func (st blackHoleState) String() string {
switch st {
case blackHoleStateProbing:
return "Probing"
case blackHoleStateAllowed:
return "Allowed"
case blackHoleStateBlocked:
return "Blocked"
default:
return fmt.Sprintf("Unknown %d", st)
}
}
type blackHoleResult int
const (
blackHoleResultAllowed blackHoleResult = iota
blackHoleResultProbing
blackHoleResultBlocked
)
// blackHoleFilter provides black hole filtering for dials. This filter should be used in
// concert with a UDP of IPv6 address filter to detect UDP or IPv6 black hole. In a black
// holed environments dial requests are blocked and only periodic probes to check the
// state of the black hole are allowed.
//
// Requests are blocked if the number of successes in the last n dials is less than
// minSuccesses. If a request succeeds in Blocked state, the filter state is reset and n
// subsequent requests are allowed before reevaluating black hole state. Dials cancelled
// when some other concurrent dial succeeded are counted as failures. A sufficiently large
// n prevents false negatives in such cases.
type blackHoleFilter struct {
// n serves the dual purpose of being the minimum number of requests after which we
// probe the state of the black hole in blocked state and the minimum number of
// completed dials required before evaluating black hole state.
n int
// minSuccesses is the minimum number of Success required in the last n dials
// to consider we are not blocked.
minSuccesses int
// name for the detector.
name string
// requests counts number of dial requests to peers. We handle request at a peer
// level and record results at individual address dial level.
requests int
// dialResults of the last `n` dials. A successful dial is true.
dialResults []bool
// successes is the count of successful dials in outcomes
successes int
// state is the current state of the detector
state blackHoleState
mu sync.Mutex
metricsTracer MetricsTracer
}
// RecordResult records the outcome of a dial. A successful dial will change the state
// of the filter to Allowed. A failed dial only blocks subsequent requests if the success
// fraction over the last n outcomes is less than the minSuccessFraction of the filter.
func (b *blackHoleFilter) RecordResult(success bool) {
b.mu.Lock()
defer b.mu.Unlock()
if b.state == blackHoleStateBlocked && success {
// If the call succeeds in a blocked state we reset to allowed.
// This is better than slowly accumulating values till we cross the minSuccessFraction
// threshold since a blackhole is a binary property.
b.reset()
return
}
if success {
b.successes++
}
b.dialResults = append(b.dialResults, success)
if len(b.dialResults) > b.n {
if b.dialResults[0] {
b.successes--
}
b.dialResults = b.dialResults[1:]
}
b.updateState()
b.trackMetrics()
}
// HandleRequest returns the result of applying the black hole filter for the request.
func (b *blackHoleFilter) HandleRequest() blackHoleResult {
b.mu.Lock()
defer b.mu.Unlock()
b.requests++
b.trackMetrics()
if b.state == blackHoleStateAllowed {
return blackHoleResultAllowed
} else if b.state == blackHoleStateProbing || b.requests%b.n == 0 {
return blackHoleResultProbing
} else {
return blackHoleResultBlocked
}
}
func (b *blackHoleFilter) reset() {
b.successes = 0
b.dialResults = b.dialResults[:0]
b.requests = 0
b.updateState()
}
func (b *blackHoleFilter) updateState() {
st := b.state
if len(b.dialResults) < b.n {
b.state = blackHoleStateProbing
} else if b.successes >= b.minSuccesses {
b.state = blackHoleStateAllowed
} else {
b.state = blackHoleStateBlocked
}
if st != b.state {
log.Debugf("%s blackHoleDetector state changed from %s to %s", b.name, st, b.state)
}
}
func (b *blackHoleFilter) trackMetrics() {
if b.metricsTracer == nil {
return
}
nextRequestAllowedAfter := 0
if b.state == blackHoleStateBlocked {
nextRequestAllowedAfter = b.n - (b.requests % b.n)
}
successFraction := 0.0
if len(b.dialResults) > 0 {
successFraction = float64(b.successes) / float64(len(b.dialResults))
}
b.metricsTracer.UpdatedBlackHoleFilterState(
b.name,
b.state,
nextRequestAllowedAfter,
successFraction,
)
}
// blackHoleDetector provides UDP and IPv6 black hole detection using a `blackHoleFilter`
// for each. For details of the black hole detection logic see `blackHoleFilter`.
//
// black hole filtering is done at a peer dial level to ensure that periodic probes to
// detect change of the black hole state are actually dialed and are not skipped
// because of dial prioritisation logic.
type blackHoleDetector struct {
udp, ipv6 *blackHoleFilter
}
// FilterAddrs filters the peer's addresses removing black holed addresses
func (d *blackHoleDetector) FilterAddrs(addrs []ma.Multiaddr) (valid []ma.Multiaddr, blackHoled []ma.Multiaddr) {
hasUDP, hasIPv6 := false, false
for _, a := range addrs {
if !manet.IsPublicAddr(a) {
continue
}
if isProtocolAddr(a, ma.P_UDP) {
hasUDP = true
}
if isProtocolAddr(a, ma.P_IP6) {
hasIPv6 = true
}
}
udpRes := blackHoleResultAllowed
if d.udp != nil && hasUDP {
udpRes = d.udp.HandleRequest()
}
ipv6Res := blackHoleResultAllowed
if d.ipv6 != nil && hasIPv6 {
ipv6Res = d.ipv6.HandleRequest()
}
blackHoled = make([]ma.Multiaddr, 0, len(addrs))
return ma.FilterAddrs(
addrs,
func(a ma.Multiaddr) bool {
if !manet.IsPublicAddr(a) {
return true
}
// allow all UDP addresses while probing irrespective of IPv6 black hole state
if udpRes == blackHoleResultProbing && isProtocolAddr(a, ma.P_UDP) {
return true
}
// allow all IPv6 addresses while probing irrespective of UDP black hole state
if ipv6Res == blackHoleResultProbing && isProtocolAddr(a, ma.P_IP6) {
return true
}
if udpRes == blackHoleResultBlocked && isProtocolAddr(a, ma.P_UDP) {
blackHoled = append(blackHoled, a)
return false
}
if ipv6Res == blackHoleResultBlocked && isProtocolAddr(a, ma.P_IP6) {
blackHoled = append(blackHoled, a)
return false
}
return true
},
), blackHoled
}
// RecordResult updates the state of the relevant `blackHoleFilter`s for addr
func (d *blackHoleDetector) RecordResult(addr ma.Multiaddr, success bool) {
if !manet.IsPublicAddr(addr) {
return
}
if d.udp != nil && isProtocolAddr(addr, ma.P_UDP) {
d.udp.RecordResult(success)
}
if d.ipv6 != nil && isProtocolAddr(addr, ma.P_IP6) {
d.ipv6.RecordResult(success)
}
}
// blackHoleConfig is the config used for black hole detection
type blackHoleConfig struct {
// Enabled enables black hole detection
Enabled bool
// N is the size of the sliding window used to evaluate black hole state
N int
// MinSuccesses is the minimum number of successes out of N required to not
// block requests
MinSuccesses int
}
func newBlackHoleDetector(udpConfig, ipv6Config blackHoleConfig, mt MetricsTracer) *blackHoleDetector {
d := &blackHoleDetector{}
if udpConfig.Enabled {
d.udp = &blackHoleFilter{
n: udpConfig.N,
minSuccesses: udpConfig.MinSuccesses,
name: "UDP",
metricsTracer: mt,
}
}
if ipv6Config.Enabled {
d.ipv6 = &blackHoleFilter{
n: ipv6Config.N,
minSuccesses: ipv6Config.MinSuccesses,
name: "IPv6",
metricsTracer: mt,
}
}
return d
}

View File

@@ -0,0 +1,49 @@
package swarm
import "time"
// InstantTimer is a timer that triggers at some instant rather than some duration
type InstantTimer interface {
Reset(d time.Time) bool
Stop() bool
Ch() <-chan time.Time
}
// Clock is a clock that can create timers that trigger at some
// instant rather than some duration
type Clock interface {
Now() time.Time
Since(t time.Time) time.Duration
InstantTimer(when time.Time) InstantTimer
}
type RealTimer struct{ t *time.Timer }
var _ InstantTimer = (*RealTimer)(nil)
func (t RealTimer) Ch() <-chan time.Time {
return t.t.C
}
func (t RealTimer) Reset(d time.Time) bool {
return t.t.Reset(time.Until(d))
}
func (t RealTimer) Stop() bool {
return t.t.Stop()
}
type RealClock struct{}
var _ Clock = RealClock{}
func (RealClock) Now() time.Time {
return time.Now()
}
func (RealClock) Since(t time.Time) time.Duration {
return time.Since(t)
}
func (RealClock) InstantTimer(when time.Time) InstantTimer {
t := time.NewTimer(time.Until(when))
return &RealTimer{t}
}

View File

@@ -0,0 +1,82 @@
package swarm
import (
"fmt"
"os"
"strings"
"github.com/libp2p/go-libp2p/core/peer"
ma "github.com/multiformats/go-multiaddr"
)
// maxDialDialErrors is the maximum number of dial errors we record
const maxDialDialErrors = 16
// DialError is the error type returned when dialing.
type DialError struct {
Peer peer.ID
DialErrors []TransportError
Cause error
Skipped int
}
func (e *DialError) Timeout() bool {
return os.IsTimeout(e.Cause)
}
func (e *DialError) recordErr(addr ma.Multiaddr, err error) {
if len(e.DialErrors) >= maxDialDialErrors {
e.Skipped++
return
}
e.DialErrors = append(e.DialErrors, TransportError{Address: addr, Cause: err})
}
func (e *DialError) Error() string {
var builder strings.Builder
fmt.Fprintf(&builder, "failed to dial %s:", e.Peer)
if e.Cause != nil {
fmt.Fprintf(&builder, " %s", e.Cause)
}
for _, te := range e.DialErrors {
fmt.Fprintf(&builder, "\n * [%s] %s", te.Address, te.Cause)
}
if e.Skipped > 0 {
fmt.Fprintf(&builder, "\n ... skipping %d errors ...", e.Skipped)
}
return builder.String()
}
func (e *DialError) Unwrap() []error {
if e == nil {
return nil
}
errs := make([]error, len(e.DialErrors)+1)
if e.Cause != nil {
errs = append(errs, e.Cause)
}
for i := 0; i < len(e.DialErrors); i++ {
errs = append(errs, &e.DialErrors[i])
}
return errs
}
var _ error = (*DialError)(nil)
// TransportError is the error returned when dialing a specific address.
type TransportError struct {
Address ma.Multiaddr
Cause error
}
func (e *TransportError) Error() string {
return fmt.Sprintf("failed to dial %s: %s", e.Address, e.Cause)
}
func (e *TransportError) Unwrap() error {
return e.Cause
}
var _ error = (*TransportError)(nil)

View File

@@ -0,0 +1,262 @@
package swarm
import (
"sort"
"strconv"
"time"
"github.com/libp2p/go-libp2p/core/network"
ma "github.com/multiformats/go-multiaddr"
manet "github.com/multiformats/go-multiaddr/net"
)
// The 250ms value is from happy eyeballs RFC 8305. This is a rough estimate of 1 RTT
const (
// duration by which TCP dials are delayed relative to the last QUIC dial
PublicTCPDelay = 250 * time.Millisecond
PrivateTCPDelay = 30 * time.Millisecond
// duration by which QUIC dials are delayed relative to previous QUIC dial
PublicQUICDelay = 250 * time.Millisecond
PrivateQUICDelay = 30 * time.Millisecond
// RelayDelay is the duration by which relay dials are delayed relative to direct addresses
RelayDelay = 500 * time.Millisecond
)
// NoDelayDialRanker ranks addresses with no delay. This is useful for simultaneous connect requests.
func NoDelayDialRanker(addrs []ma.Multiaddr) []network.AddrDelay {
return getAddrDelay(addrs, 0, 0, 0)
}
// DefaultDialRanker determines the ranking of outgoing connection attempts.
//
// Addresses are grouped into three distinct groups:
//
// - private addresses (localhost and local networks (RFC 1918))
// - public addresses
// - relay addresses
//
// Within each group, the addresses are ranked according to the ranking logic described below.
// We then dial addresses according to this ranking, with short timeouts applied between dial attempts.
// This ranking logic dramatically reduces the number of simultaneous dial attempts, while introducing
// no additional latency in the vast majority of cases.
//
// Private and public address groups are dialed in parallel.
// Dialing relay addresses is delayed by 500 ms, if we have any non-relay alternatives.
//
// Within each group (private, public, relay addresses) we apply the following ranking logic:
//
// 1. If both IPv6 QUIC and IPv4 QUIC addresses are present, we do a Happy Eyeballs RFC 8305 style ranking.
// First dial the IPv6 QUIC address with the lowest port. After this we dial the IPv4 QUIC address with
// the lowest port delayed by 250ms (PublicQUICDelay) for public addresses, and 30ms (PrivateQUICDelay)
// for local addresses. After this we dial all the rest of the addresses delayed by 250ms (PublicQUICDelay)
// for public addresses, and 30ms (PrivateQUICDelay) for local addresses.
// 2. If only one of QUIC IPv6 or QUIC IPv4 addresses are present, dial the QUIC address with the lowest port
// first. After this we dial the rest of the QUIC addresses delayed by 250ms (PublicQUICDelay) for public
// addresses, and 30ms (PrivateQUICDelay) for local addresses.
// 3. If a QUIC or WebTransport address is present, TCP addresses dials are delayed relative to the last QUIC dial:
// We prefer to end up with a QUIC connection. For public addresses, the delay introduced is 250ms (PublicTCPDelay),
// and for private addresses 30ms (PrivateTCPDelay).
// 4. For the TCP addresses we follow a strategy similar to QUIC with an optimisation for handling the long TCP
// handshake time described in 6. If both IPv6 TCP and IPv4 TCP addresses are present, we do a Happy Eyeballs
// style ranking. First dial the IPv6 TCP address with the lowest port. After this, dial the IPv4 TCP address
// with the lowest port delayed by 250ms (PublicTCPDelay) for public addresses, and 30ms (PrivateTCPDelay)
// for local addresses. After this we dial all the rest of the addresses delayed by 250ms (PublicTCPDelay) for
// public addresses, and 30ms (PrivateTCPDelay) for local addresses.
// 5. If only one of TCP IPv6 or TCP IPv4 addresses are present, dial the TCP address with the lowest port
// first. After this we dial the rest of the TCP addresses delayed by 250ms (PublicTCPDelay) for public
// addresses, and 30ms (PrivateTCPDelay) for local addresses.
// 6. When a TCP socket is connected and awaiting security and muxer upgrade, we stop new dials for 2*PrivateTCPDelay
// to allow for the upgrade to complete.
//
// We dial lowest ports first as they are more likely to be the listen port.
func DefaultDialRanker(addrs []ma.Multiaddr) []network.AddrDelay {
relay, addrs := filterAddrs(addrs, isRelayAddr)
pvt, addrs := filterAddrs(addrs, manet.IsPrivateAddr)
public, addrs := filterAddrs(addrs, func(a ma.Multiaddr) bool { return isProtocolAddr(a, ma.P_IP4) || isProtocolAddr(a, ma.P_IP6) })
var relayOffset time.Duration
if len(public) > 0 {
// if there is a public direct address available delay relay dials
relayOffset = RelayDelay
}
res := make([]network.AddrDelay, 0, len(addrs))
for i := 0; i < len(addrs); i++ {
res = append(res, network.AddrDelay{Addr: addrs[i], Delay: 0})
}
res = append(res, getAddrDelay(pvt, PrivateTCPDelay, PrivateQUICDelay, 0)...)
res = append(res, getAddrDelay(public, PublicTCPDelay, PublicQUICDelay, 0)...)
res = append(res, getAddrDelay(relay, PublicTCPDelay, PublicQUICDelay, relayOffset)...)
return res
}
// getAddrDelay ranks a group of addresses according to the ranking logic explained in
// documentation for defaultDialRanker.
// offset is used to delay all addresses by a fixed duration. This is useful for delaying all relay
// addresses relative to direct addresses.
func getAddrDelay(addrs []ma.Multiaddr, tcpDelay time.Duration, quicDelay time.Duration,
offset time.Duration) []network.AddrDelay {
if len(addrs) == 0 {
return nil
}
sort.Slice(addrs, func(i, j int) bool { return score(addrs[i]) < score(addrs[j]) })
// addrs is now sorted by (Transport, IPVersion). Reorder addrs for happy eyeballs dialing.
// For QUIC and TCP, if we have both IPv6 and IPv4 addresses, move the
// highest priority IPv4 address to the second position.
happyEyeballsQUIC := false
happyEyeballsTCP := false
// tcpStartIdx is the index of the first TCP Address
var tcpStartIdx int
{
i := 0
// If the first QUIC address is IPv6 move the first QUIC IPv4 address to second position
if isQUICAddr(addrs[0]) && isProtocolAddr(addrs[0], ma.P_IP6) {
for j := 1; j < len(addrs); j++ {
if isQUICAddr(addrs[j]) && isProtocolAddr(addrs[j], ma.P_IP4) {
// The first IPv4 address is at position j
// Move the jth element at position 1 shifting the affected elements
if j > 1 {
a := addrs[j]
copy(addrs[2:], addrs[1:j])
addrs[1] = a
}
happyEyeballsQUIC = true
i = j + 1
break
}
}
}
for tcpStartIdx = i; tcpStartIdx < len(addrs); tcpStartIdx++ {
if isProtocolAddr(addrs[tcpStartIdx], ma.P_TCP) {
break
}
}
// If the first TCP address is IPv6 move the first TCP IPv4 address to second position
if tcpStartIdx < len(addrs) && isProtocolAddr(addrs[tcpStartIdx], ma.P_IP6) {
for j := tcpStartIdx + 1; j < len(addrs); j++ {
if isProtocolAddr(addrs[j], ma.P_TCP) && isProtocolAddr(addrs[j], ma.P_IP4) {
// First TCP IPv4 address is at position j, move it to position tcpStartIdx+1
// which is the second priority TCP address
if j > tcpStartIdx+1 {
a := addrs[j]
copy(addrs[tcpStartIdx+2:], addrs[tcpStartIdx+1:j])
addrs[tcpStartIdx+1] = a
}
happyEyeballsTCP = true
break
}
}
}
}
res := make([]network.AddrDelay, 0, len(addrs))
var tcpFirstDialDelay time.Duration
for i, addr := range addrs {
var delay time.Duration
switch {
case isQUICAddr(addr):
// We dial an IPv6 address, then after quicDelay an IPv4
// address, then after a further quicDelay we dial the rest of the addresses.
if i == 1 {
delay = quicDelay
}
if i > 1 {
// If we have happy eyeballs for QUIC, dials after the second position
// will be delayed by 2*quicDelay
if happyEyeballsQUIC {
delay = 2 * quicDelay
} else {
delay = quicDelay
}
}
tcpFirstDialDelay = delay + tcpDelay
case isProtocolAddr(addr, ma.P_TCP):
// We dial an IPv6 address, then after tcpDelay an IPv4
// address, then after a further tcpDelay we dial the rest of the addresses.
if i == tcpStartIdx+1 {
delay = tcpDelay
}
if i > tcpStartIdx+1 {
// If we have happy eyeballs for TCP, dials after the second position
// will be delayed by 2*tcpDelay
if happyEyeballsTCP {
delay = 2 * tcpDelay
} else {
delay = tcpDelay
}
}
delay += tcpFirstDialDelay
}
res = append(res, network.AddrDelay{Addr: addr, Delay: offset + delay})
}
return res
}
// score scores a multiaddress for dialing delay. Lower is better.
// The lower 16 bits of the result are the port. Low ports are ranked higher because they're
// more likely to be listen addresses.
// The addresses are ranked as:
// QUICv1 IPv6 > QUICdraft29 IPv6 > QUICv1 IPv4 > QUICdraft29 IPv4 >
// WebTransport IPv6 > WebTransport IPv4 > TCP IPv6 > TCP IPv4
func score(a ma.Multiaddr) int {
ip4Weight := 0
if isProtocolAddr(a, ma.P_IP4) {
ip4Weight = 1 << 18
}
if _, err := a.ValueForProtocol(ma.P_WEBTRANSPORT); err == nil {
p, _ := a.ValueForProtocol(ma.P_UDP)
pi, _ := strconv.Atoi(p)
return ip4Weight + (1 << 19) + pi
}
if _, err := a.ValueForProtocol(ma.P_QUIC); err == nil {
p, _ := a.ValueForProtocol(ma.P_UDP)
pi, _ := strconv.Atoi(p)
return ip4Weight + pi + (1 << 17)
}
if _, err := a.ValueForProtocol(ma.P_QUIC_V1); err == nil {
p, _ := a.ValueForProtocol(ma.P_UDP)
pi, _ := strconv.Atoi(p)
return ip4Weight + pi
}
if p, err := a.ValueForProtocol(ma.P_TCP); err == nil {
pi, _ := strconv.Atoi(p)
return ip4Weight + pi + (1 << 20)
}
return (1 << 30)
}
func isProtocolAddr(a ma.Multiaddr, p int) bool {
found := false
ma.ForEach(a, func(c ma.Component) bool {
if c.Protocol().Code == p {
found = true
return false
}
return true
})
return found
}
func isQUICAddr(a ma.Multiaddr) bool {
return isProtocolAddr(a, ma.P_QUIC) || isProtocolAddr(a, ma.P_QUIC_V1)
}
// filterAddrs filters an address slice in place
func filterAddrs(addrs []ma.Multiaddr, f func(a ma.Multiaddr) bool) (filtered, rest []ma.Multiaddr) {
j := 0
for i := 0; i < len(addrs); i++ {
if f(addrs[i]) {
addrs[i], addrs[j] = addrs[j], addrs[i]
j++
}
}
return addrs[:j], addrs[j:]
}

View File

@@ -0,0 +1,115 @@
package swarm
import (
"context"
"errors"
"sync"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
)
// dialWorkerFunc is used by dialSync to spawn a new dial worker
type dialWorkerFunc func(peer.ID, <-chan dialRequest)
// errConcurrentDialSuccessful is used to signal that a concurrent dial succeeded
var errConcurrentDialSuccessful = errors.New("concurrent dial successful")
// newDialSync constructs a new dialSync
func newDialSync(worker dialWorkerFunc) *dialSync {
return &dialSync{
dials: make(map[peer.ID]*activeDial),
dialWorker: worker,
}
}
// dialSync is a dial synchronization helper that ensures that at most one dial
// to any given peer is active at any given time.
type dialSync struct {
mutex sync.Mutex
dials map[peer.ID]*activeDial
dialWorker dialWorkerFunc
}
type activeDial struct {
refCnt int
ctx context.Context
cancelCause func(error)
reqch chan dialRequest
}
func (ad *activeDial) dial(ctx context.Context) (*Conn, error) {
dialCtx := ad.ctx
if forceDirect, reason := network.GetForceDirectDial(ctx); forceDirect {
dialCtx = network.WithForceDirectDial(dialCtx, reason)
}
if simConnect, isClient, reason := network.GetSimultaneousConnect(ctx); simConnect {
dialCtx = network.WithSimultaneousConnect(dialCtx, isClient, reason)
}
resch := make(chan dialResponse, 1)
select {
case ad.reqch <- dialRequest{ctx: dialCtx, resch: resch}:
case <-ctx.Done():
return nil, ctx.Err()
}
select {
case res := <-resch:
return res.conn, res.err
case <-ctx.Done():
return nil, ctx.Err()
}
}
func (ds *dialSync) getActiveDial(p peer.ID) (*activeDial, error) {
ds.mutex.Lock()
defer ds.mutex.Unlock()
actd, ok := ds.dials[p]
if !ok {
// This code intentionally uses the background context. Otherwise, if the first call
// to Dial is canceled, subsequent dial calls will also be canceled.
ctx, cancel := context.WithCancelCause(context.Background())
actd = &activeDial{
ctx: ctx,
cancelCause: cancel,
reqch: make(chan dialRequest),
}
go ds.dialWorker(p, actd.reqch)
ds.dials[p] = actd
}
// increase ref count before dropping mutex
actd.refCnt++
return actd, nil
}
// Dial initiates a dial to the given peer if there are none in progress
// then waits for the dial to that peer to complete.
func (ds *dialSync) Dial(ctx context.Context, p peer.ID) (*Conn, error) {
ad, err := ds.getActiveDial(p)
if err != nil {
return nil, err
}
conn, err := ad.dial(ctx)
ds.mutex.Lock()
defer ds.mutex.Unlock()
ad.refCnt--
if ad.refCnt == 0 {
if err == nil {
ad.cancelCause(errConcurrentDialSuccessful)
} else {
ad.cancelCause(err)
}
close(ad.reqch)
delete(ds.dials, p)
}
return conn, err
}

View File

@@ -0,0 +1,493 @@
package swarm
import (
"context"
"math"
"sync"
"time"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
tpt "github.com/libp2p/go-libp2p/core/transport"
ma "github.com/multiformats/go-multiaddr"
manet "github.com/multiformats/go-multiaddr/net"
)
// dialRequest is structure used to request dials to the peer associated with a
// worker loop
type dialRequest struct {
// ctx is the context that may be used for the request
// if another concurrent request is made, any of the concurrent request's ctx may be used for
// dials to the peer's addresses
// ctx for simultaneous connect requests have higher priority than normal requests
ctx context.Context
// resch is the channel used to send the response for this query
resch chan dialResponse
}
// dialResponse is the response sent to dialRequests on the request's resch channel
type dialResponse struct {
// conn is the connection to the peer on success
conn *Conn
// err is the error in dialing the peer
// nil on connection success
err error
}
// pendRequest is used to track progress on a dialRequest.
type pendRequest struct {
// req is the original dialRequest
req dialRequest
// err comprises errors of all failed dials
err *DialError
// addrs are the addresses on which we are waiting for pending dials
// At the time of creation addrs is initialised to all the addresses of the peer. On a failed dial,
// the addr is removed from the map and err is updated. On a successful dial, the dialRequest is
// completed and response is sent with the connection
addrs map[string]struct{}
}
// addrDial tracks dials to a particular multiaddress.
type addrDial struct {
// addr is the address dialed
addr ma.Multiaddr
// ctx is the context used for dialing the address
ctx context.Context
// conn is the established connection on success
conn *Conn
// err is the err on dialing the address
err error
// dialed indicates whether we have triggered the dial to the address
dialed bool
// createdAt is the time this struct was created
createdAt time.Time
// dialRankingDelay is the delay in dialing this address introduced by the ranking logic
dialRankingDelay time.Duration
// expectedTCPUpgradeTime is the expected time by which security upgrade will complete
expectedTCPUpgradeTime time.Time
}
// dialWorker synchronises concurrent dials to a peer. It ensures that we make at most one dial to a
// peer's address
type dialWorker struct {
s *Swarm
peer peer.ID
// reqch is used to send dial requests to the worker. close reqch to end the worker loop
reqch <-chan dialRequest
// pendingRequests is the set of pendingRequests
pendingRequests map[*pendRequest]struct{}
// trackedDials tracks dials to the peer's addresses. An entry here is used to ensure that
// we dial an address at most once
trackedDials map[string]*addrDial
// resch is used to receive response for dials to the peers addresses.
resch chan tpt.DialUpdate
connected bool // true when a connection has been successfully established
// for testing
wg sync.WaitGroup
cl Clock
}
func newDialWorker(s *Swarm, p peer.ID, reqch <-chan dialRequest, cl Clock) *dialWorker {
if cl == nil {
cl = RealClock{}
}
return &dialWorker{
s: s,
peer: p,
reqch: reqch,
pendingRequests: make(map[*pendRequest]struct{}),
trackedDials: make(map[string]*addrDial),
resch: make(chan tpt.DialUpdate),
cl: cl,
}
}
// loop implements the core dial worker loop. Requests are received on w.reqch.
// The loop exits when w.reqch is closed.
func (w *dialWorker) loop() {
w.wg.Add(1)
defer w.wg.Done()
defer w.s.limiter.clearAllPeerDials(w.peer)
// dq is used to pace dials to different addresses of the peer
dq := newDialQueue()
// dialsInFlight is the number of dials in flight.
dialsInFlight := 0
startTime := w.cl.Now()
// dialTimer is the dialTimer used to trigger dials
dialTimer := w.cl.InstantTimer(startTime.Add(math.MaxInt64))
timerRunning := true
// scheduleNextDial updates timer for triggering the next dial
scheduleNextDial := func() {
if timerRunning && !dialTimer.Stop() {
<-dialTimer.Ch()
}
timerRunning = false
if dq.Len() > 0 {
if dialsInFlight == 0 && !w.connected {
// if there are no dials in flight, trigger the next dials immediately
dialTimer.Reset(startTime)
} else {
resetTime := startTime.Add(dq.top().Delay)
for _, ad := range w.trackedDials {
if !ad.expectedTCPUpgradeTime.IsZero() && ad.expectedTCPUpgradeTime.After(resetTime) {
resetTime = ad.expectedTCPUpgradeTime
}
}
dialTimer.Reset(resetTime)
}
timerRunning = true
}
}
// totalDials is used to track number of dials made by this worker for metrics
totalDials := 0
loop:
for {
// The loop has three parts
// 1. Input requests are received on w.reqch. If a suitable connection is not available we create
// a pendRequest object to track the dialRequest and add the addresses to dq.
// 2. Addresses from the dialQueue are dialed at appropriate time intervals depending on delay logic.
// We are notified of the completion of these dials on w.resch.
// 3. Responses for dials are received on w.resch. On receiving a response, we updated the pendRequests
// interested in dials on this address.
select {
case req, ok := <-w.reqch:
if !ok {
if w.s.metricsTracer != nil {
w.s.metricsTracer.DialCompleted(w.connected, totalDials)
}
return
}
// We have received a new request. If we do not have a suitable connection,
// track this dialRequest with a pendRequest.
// Enqueue the peer's addresses relevant to this request in dq and
// track dials to the addresses relevant to this request.
c := w.s.bestAcceptableConnToPeer(req.ctx, w.peer)
if c != nil {
req.resch <- dialResponse{conn: c}
continue loop
}
addrs, addrErrs, err := w.s.addrsForDial(req.ctx, w.peer)
if err != nil {
req.resch <- dialResponse{
err: &DialError{
Peer: w.peer,
DialErrors: addrErrs,
Cause: err,
}}
continue loop
}
// get the delays to dial these addrs from the swarms dialRanker
simConnect, _, _ := network.GetSimultaneousConnect(req.ctx)
addrRanking := w.rankAddrs(addrs, simConnect)
addrDelay := make(map[string]time.Duration, len(addrRanking))
// create the pending request object
pr := &pendRequest{
req: req,
addrs: make(map[string]struct{}, len(addrRanking)),
err: &DialError{Peer: w.peer, DialErrors: addrErrs},
}
for _, adelay := range addrRanking {
pr.addrs[string(adelay.Addr.Bytes())] = struct{}{}
addrDelay[string(adelay.Addr.Bytes())] = adelay.Delay
}
// Check if dials to any of the addrs have completed already
// If they have errored, record the error in pr. If they have succeeded,
// respond with the connection.
// If they are pending, add them to tojoin.
// If we haven't seen any of the addresses before, add them to todial.
var todial []ma.Multiaddr
var tojoin []*addrDial
for _, adelay := range addrRanking {
ad, ok := w.trackedDials[string(adelay.Addr.Bytes())]
if !ok {
todial = append(todial, adelay.Addr)
continue
}
if ad.conn != nil {
// dial to this addr was successful, complete the request
req.resch <- dialResponse{conn: ad.conn}
continue loop
}
if ad.err != nil {
// dial to this addr errored, accumulate the error
pr.err.recordErr(ad.addr, ad.err)
delete(pr.addrs, string(ad.addr.Bytes()))
continue
}
// dial is still pending, add to the join list
tojoin = append(tojoin, ad)
}
if len(todial) == 0 && len(tojoin) == 0 {
// all request applicable addrs have been dialed, we must have errored
pr.err.Cause = ErrAllDialsFailed
req.resch <- dialResponse{err: pr.err}
continue loop
}
// The request has some pending or new dials
w.pendingRequests[pr] = struct{}{}
for _, ad := range tojoin {
if !ad.dialed {
// we haven't dialed this address. update the ad.ctx to have simultaneous connect values
// set correctly
if simConnect, isClient, reason := network.GetSimultaneousConnect(req.ctx); simConnect {
if simConnect, _, _ := network.GetSimultaneousConnect(ad.ctx); !simConnect {
ad.ctx = network.WithSimultaneousConnect(ad.ctx, isClient, reason)
// update the element in dq to use the simultaneous connect delay.
dq.Add(network.AddrDelay{
Addr: ad.addr,
Delay: addrDelay[string(ad.addr.Bytes())],
})
}
}
}
// add the request to the addrDial
}
if len(todial) > 0 {
now := time.Now()
// these are new addresses, track them and add them to dq
for _, a := range todial {
w.trackedDials[string(a.Bytes())] = &addrDial{
addr: a,
ctx: req.ctx,
createdAt: now,
}
dq.Add(network.AddrDelay{Addr: a, Delay: addrDelay[string(a.Bytes())]})
}
}
// setup dialTimer for updates to dq
scheduleNextDial()
case <-dialTimer.Ch():
// It's time to dial the next batch of addresses.
// We don't check the delay of the addresses received from the queue here
// because if the timer triggered before the delay, it means that all
// the inflight dials have errored and we should dial the next batch of
// addresses
now := time.Now()
for _, adelay := range dq.NextBatch() {
// spawn the dial
ad, ok := w.trackedDials[string(adelay.Addr.Bytes())]
if !ok {
log.Errorf("SWARM BUG: no entry for address %s in trackedDials", adelay.Addr)
continue
}
ad.dialed = true
ad.dialRankingDelay = now.Sub(ad.createdAt)
err := w.s.dialNextAddr(ad.ctx, w.peer, ad.addr, w.resch)
if err != nil {
// Errored without attempting a dial. This happens in case of
// backoff or black hole.
w.dispatchError(ad, err)
} else {
// the dial was successful. update inflight dials
dialsInFlight++
totalDials++
}
}
timerRunning = false
// schedule more dials
scheduleNextDial()
case res := <-w.resch:
// A dial to an address has completed.
// Update all requests waiting on this address. On success, complete the request.
// On error, record the error
ad, ok := w.trackedDials[string(res.Addr.Bytes())]
if !ok {
log.Errorf("SWARM BUG: no entry for address %s in trackedDials", res.Addr)
if res.Conn != nil {
res.Conn.Close()
}
dialsInFlight--
continue
}
// TCP Connection has been established. Wait for connection upgrade on this address
// before making new dials.
if res.Kind == tpt.UpdateKindHandshakeProgressed {
// Only wait for public addresses to complete dialing since private dials
// are quick any way
if manet.IsPublicAddr(res.Addr) {
ad.expectedTCPUpgradeTime = w.cl.Now().Add(PublicTCPDelay)
}
scheduleNextDial()
continue
}
dialsInFlight--
ad.expectedTCPUpgradeTime = time.Time{}
if res.Conn != nil {
// we got a connection, add it to the swarm
conn, err := w.s.addConn(res.Conn, network.DirOutbound)
if err != nil {
// oops no, we failed to add it to the swarm
res.Conn.Close()
w.dispatchError(ad, err)
continue loop
}
for pr := range w.pendingRequests {
if _, ok := pr.addrs[string(ad.addr.Bytes())]; ok {
pr.req.resch <- dialResponse{conn: conn}
delete(w.pendingRequests, pr)
}
}
ad.conn = conn
if !w.connected {
w.connected = true
if w.s.metricsTracer != nil {
w.s.metricsTracer.DialRankingDelay(ad.dialRankingDelay)
}
}
continue loop
}
// it must be an error -- add backoff if applicable and dispatch
// ErrDialRefusedBlackHole shouldn't end up here, just a safety check
if res.Err != ErrDialRefusedBlackHole && res.Err != context.Canceled && !w.connected {
// we only add backoff if there has not been a successful connection
// for consistency with the old dialer behavior.
w.s.backf.AddBackoff(w.peer, res.Addr)
} else if res.Err == ErrDialRefusedBlackHole {
log.Errorf("SWARM BUG: unexpected ErrDialRefusedBlackHole while dialing peer %s to addr %s",
w.peer, res.Addr)
}
w.dispatchError(ad, res.Err)
// Only schedule next dial on error.
// If we scheduleNextDial on success, we will end up making one dial more than
// required because the final successful dial will spawn one more dial
scheduleNextDial()
}
}
}
// dispatches an error to a specific addr dial
func (w *dialWorker) dispatchError(ad *addrDial, err error) {
ad.err = err
for pr := range w.pendingRequests {
// accumulate the error
if _, ok := pr.addrs[string(ad.addr.Bytes())]; ok {
pr.err.recordErr(ad.addr, err)
delete(pr.addrs, string(ad.addr.Bytes()))
if len(pr.addrs) == 0 {
// all addrs have erred, dispatch dial error
// but first do a last one check in case an acceptable connection has landed from
// a simultaneous dial that started later and added new acceptable addrs
c := w.s.bestAcceptableConnToPeer(pr.req.ctx, w.peer)
if c != nil {
pr.req.resch <- dialResponse{conn: c}
} else {
pr.err.Cause = ErrAllDialsFailed
pr.req.resch <- dialResponse{err: pr.err}
}
delete(w.pendingRequests, pr)
}
}
}
// if it was a backoff, clear the address dial so that it doesn't inhibit new dial requests.
// this is necessary to support active listen scenarios, where a new dial comes in while
// another dial is in progress, and needs to do a direct connection without inhibitions from
// dial backoff.
if err == ErrDialBackoff {
delete(w.trackedDials, string(ad.addr.Bytes()))
}
}
// rankAddrs ranks addresses for dialing. if it's a simConnect request we
// dial all addresses immediately without any delay
func (w *dialWorker) rankAddrs(addrs []ma.Multiaddr, isSimConnect bool) []network.AddrDelay {
if isSimConnect {
return NoDelayDialRanker(addrs)
}
return w.s.dialRanker(addrs)
}
// dialQueue is a priority queue used to schedule dials
type dialQueue struct {
// q contains dials ordered by delay
q []network.AddrDelay
}
// newDialQueue returns a new dialQueue
func newDialQueue() *dialQueue {
return &dialQueue{q: make([]network.AddrDelay, 0, 16)}
}
// Add adds adelay to the queue. If another element exists in the queue with
// the same address, it replaces that element.
func (dq *dialQueue) Add(adelay network.AddrDelay) {
for i := 0; i < dq.Len(); i++ {
if dq.q[i].Addr.Equal(adelay.Addr) {
if dq.q[i].Delay == adelay.Delay {
// existing element is the same. nothing to do
return
}
// remove the element
copy(dq.q[i:], dq.q[i+1:])
dq.q = dq.q[:len(dq.q)-1]
break
}
}
for i := 0; i < dq.Len(); i++ {
if dq.q[i].Delay > adelay.Delay {
dq.q = append(dq.q, network.AddrDelay{}) // extend the slice
copy(dq.q[i+1:], dq.q[i:])
dq.q[i] = adelay
return
}
}
dq.q = append(dq.q, adelay)
}
// NextBatch returns all the elements in the queue with the highest priority
func (dq *dialQueue) NextBatch() []network.AddrDelay {
if dq.Len() == 0 {
return nil
}
// i is the index of the second highest priority element
var i int
for i = 0; i < dq.Len(); i++ {
if dq.q[i].Delay != dq.q[0].Delay {
break
}
}
res := dq.q[:i]
dq.q = dq.q[i:]
return res
}
// top returns the top element of the queue
func (dq *dialQueue) top() network.AddrDelay {
return dq.q[0]
}
// Len returns the number of elements in the queue
func (dq *dialQueue) Len() int {
return len(dq.q)
}

View File

@@ -0,0 +1,225 @@
package swarm
import (
"context"
"os"
"strconv"
"sync"
"time"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/core/transport"
ma "github.com/multiformats/go-multiaddr"
)
type dialJob struct {
addr ma.Multiaddr
peer peer.ID
ctx context.Context
resp chan transport.DialUpdate
timeout time.Duration
}
func (dj *dialJob) cancelled() bool {
return dj.ctx.Err() != nil
}
type dialLimiter struct {
lk sync.Mutex
fdConsuming int
fdLimit int
waitingOnFd []*dialJob
dialFunc dialfunc
activePerPeer map[peer.ID]int
perPeerLimit int
waitingOnPeerLimit map[peer.ID][]*dialJob
}
type dialfunc func(context.Context, peer.ID, ma.Multiaddr, chan<- transport.DialUpdate) (transport.CapableConn, error)
func newDialLimiter(df dialfunc) *dialLimiter {
fd := ConcurrentFdDials
if env := os.Getenv("LIBP2P_SWARM_FD_LIMIT"); env != "" {
if n, err := strconv.ParseInt(env, 10, 32); err == nil {
fd = int(n)
}
}
return newDialLimiterWithParams(df, fd, DefaultPerPeerRateLimit)
}
func newDialLimiterWithParams(df dialfunc, fdLimit, perPeerLimit int) *dialLimiter {
return &dialLimiter{
fdLimit: fdLimit,
perPeerLimit: perPeerLimit,
waitingOnPeerLimit: make(map[peer.ID][]*dialJob),
activePerPeer: make(map[peer.ID]int),
dialFunc: df,
}
}
// freeFDToken frees FD token and if there are any schedules another waiting dialJob
// in it's place
func (dl *dialLimiter) freeFDToken() {
log.Debugf("[limiter] freeing FD token; waiting: %d; consuming: %d", len(dl.waitingOnFd), dl.fdConsuming)
dl.fdConsuming--
for len(dl.waitingOnFd) > 0 {
next := dl.waitingOnFd[0]
dl.waitingOnFd[0] = nil // clear out memory
dl.waitingOnFd = dl.waitingOnFd[1:]
if len(dl.waitingOnFd) == 0 {
// clear out memory.
dl.waitingOnFd = nil
}
// Skip over canceled dials instead of queuing up a goroutine.
if next.cancelled() {
dl.freePeerToken(next)
continue
}
dl.fdConsuming++
// we already have activePerPeer token at this point so we can just dial
go dl.executeDial(next)
return
}
}
func (dl *dialLimiter) freePeerToken(dj *dialJob) {
log.Debugf("[limiter] freeing peer token; peer %s; addr: %s; active for peer: %d; waiting on peer limit: %d",
dj.peer, dj.addr, dl.activePerPeer[dj.peer], len(dl.waitingOnPeerLimit[dj.peer]))
// release tokens in reverse order than we take them
dl.activePerPeer[dj.peer]--
if dl.activePerPeer[dj.peer] == 0 {
delete(dl.activePerPeer, dj.peer)
}
waitlist := dl.waitingOnPeerLimit[dj.peer]
for len(waitlist) > 0 {
next := waitlist[0]
waitlist[0] = nil // clear out memory
waitlist = waitlist[1:]
if len(waitlist) == 0 {
delete(dl.waitingOnPeerLimit, next.peer)
} else {
dl.waitingOnPeerLimit[next.peer] = waitlist
}
if next.cancelled() {
continue
}
dl.activePerPeer[next.peer]++ // just kidding, we still want this token
dl.addCheckFdLimit(next)
return
}
}
func (dl *dialLimiter) finishedDial(dj *dialJob) {
dl.lk.Lock()
defer dl.lk.Unlock()
if dl.shouldConsumeFd(dj.addr) {
dl.freeFDToken()
}
dl.freePeerToken(dj)
}
func (dl *dialLimiter) shouldConsumeFd(addr ma.Multiaddr) bool {
// we don't consume FD's for relay addresses for now as they will be consumed when the Relay Transport
// actually dials the Relay server. That dial call will also pass through this limiter with
// the address of the relay server i.e. non-relay address.
_, err := addr.ValueForProtocol(ma.P_CIRCUIT)
isRelay := err == nil
return !isRelay && isFdConsumingAddr(addr)
}
func (dl *dialLimiter) addCheckFdLimit(dj *dialJob) {
if dl.shouldConsumeFd(dj.addr) {
if dl.fdConsuming >= dl.fdLimit {
log.Debugf("[limiter] blocked dial waiting on FD token; peer: %s; addr: %s; consuming: %d; "+
"limit: %d; waiting: %d", dj.peer, dj.addr, dl.fdConsuming, dl.fdLimit, len(dl.waitingOnFd))
dl.waitingOnFd = append(dl.waitingOnFd, dj)
return
}
log.Debugf("[limiter] taking FD token: peer: %s; addr: %s; prev consuming: %d",
dj.peer, dj.addr, dl.fdConsuming)
// take token
dl.fdConsuming++
}
log.Debugf("[limiter] executing dial; peer: %s; addr: %s; FD consuming: %d; waiting: %d",
dj.peer, dj.addr, dl.fdConsuming, len(dl.waitingOnFd))
go dl.executeDial(dj)
}
func (dl *dialLimiter) addCheckPeerLimit(dj *dialJob) {
if dl.activePerPeer[dj.peer] >= dl.perPeerLimit {
log.Debugf("[limiter] blocked dial waiting on peer limit; peer: %s; addr: %s; active: %d; "+
"peer limit: %d; waiting: %d", dj.peer, dj.addr, dl.activePerPeer[dj.peer], dl.perPeerLimit,
len(dl.waitingOnPeerLimit[dj.peer]))
wlist := dl.waitingOnPeerLimit[dj.peer]
dl.waitingOnPeerLimit[dj.peer] = append(wlist, dj)
return
}
dl.activePerPeer[dj.peer]++
dl.addCheckFdLimit(dj)
}
// AddDialJob tries to take the needed tokens for starting the given dial job.
// If it acquires all needed tokens, it immediately starts the dial, otherwise
// it will put it on the waitlist for the requested token.
func (dl *dialLimiter) AddDialJob(dj *dialJob) {
dl.lk.Lock()
defer dl.lk.Unlock()
log.Debugf("[limiter] adding a dial job through limiter: %v", dj.addr)
dl.addCheckPeerLimit(dj)
}
func (dl *dialLimiter) clearAllPeerDials(p peer.ID) {
dl.lk.Lock()
defer dl.lk.Unlock()
delete(dl.waitingOnPeerLimit, p)
log.Debugf("[limiter] clearing all peer dials: %v", p)
// NB: the waitingOnFd list doesn't need to be cleaned out here, we will
// remove them as we encounter them because they are 'cancelled' at this
// point
}
// executeDial calls the dialFunc, and reports the result through the response
// channel when finished. Once the response is sent it also releases all tokens
// it held during the dial.
func (dl *dialLimiter) executeDial(j *dialJob) {
defer dl.finishedDial(j)
if j.cancelled() {
return
}
dctx, cancel := context.WithTimeout(j.ctx, j.timeout)
defer cancel()
con, err := dl.dialFunc(dctx, j.peer, j.addr, j.resp)
kind := transport.UpdateKindDialSuccessful
if err != nil {
kind = transport.UpdateKindDialFailed
}
select {
case j.resp <- transport.DialUpdate{Kind: kind, Conn: con, Addr: j.addr, Err: err}:
case <-j.ctx.Done():
if con != nil {
con.Close()
}
}
}

View File

@@ -0,0 +1,821 @@
package swarm
import (
"context"
"errors"
"fmt"
"io"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/libp2p/go-libp2p/core/connmgr"
"github.com/libp2p/go-libp2p/core/event"
"github.com/libp2p/go-libp2p/core/metrics"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/core/peerstore"
"github.com/libp2p/go-libp2p/core/transport"
"golang.org/x/exp/slices"
logging "github.com/ipfs/go-log/v2"
ma "github.com/multiformats/go-multiaddr"
madns "github.com/multiformats/go-multiaddr-dns"
)
const (
defaultDialTimeout = 15 * time.Second
// defaultDialTimeoutLocal is the maximum duration a Dial to local network address
// is allowed to take.
// This includes the time between dialing the raw network connection,
// protocol selection as well the handshake, if applicable.
defaultDialTimeoutLocal = 5 * time.Second
)
var log = logging.Logger("swarm2")
// ErrSwarmClosed is returned when one attempts to operate on a closed swarm.
var ErrSwarmClosed = errors.New("swarm closed")
// ErrAddrFiltered is returned when trying to register a connection to a
// filtered address. You shouldn't see this error unless some underlying
// transport is misbehaving.
var ErrAddrFiltered = errors.New("address filtered")
// ErrDialTimeout is returned when one a dial times out due to the global timeout
var ErrDialTimeout = errors.New("dial timed out")
type Option func(*Swarm) error
// WithConnectionGater sets a connection gater
func WithConnectionGater(gater connmgr.ConnectionGater) Option {
return func(s *Swarm) error {
s.gater = gater
return nil
}
}
// WithMultiaddrResolver sets a custom multiaddress resolver
func WithMultiaddrResolver(maResolver *madns.Resolver) Option {
return func(s *Swarm) error {
s.maResolver = maResolver
return nil
}
}
// WithMetrics sets a metrics reporter
func WithMetrics(reporter metrics.Reporter) Option {
return func(s *Swarm) error {
s.bwc = reporter
return nil
}
}
func WithMetricsTracer(t MetricsTracer) Option {
return func(s *Swarm) error {
s.metricsTracer = t
return nil
}
}
func WithDialTimeout(t time.Duration) Option {
return func(s *Swarm) error {
s.dialTimeout = t
return nil
}
}
func WithDialTimeoutLocal(t time.Duration) Option {
return func(s *Swarm) error {
s.dialTimeoutLocal = t
return nil
}
}
func WithResourceManager(m network.ResourceManager) Option {
return func(s *Swarm) error {
s.rcmgr = m
return nil
}
}
// WithDialRanker configures swarm to use d as the DialRanker
func WithDialRanker(d network.DialRanker) Option {
return func(s *Swarm) error {
if d == nil {
return errors.New("swarm: dial ranker cannot be nil")
}
s.dialRanker = d
return nil
}
}
// WithUDPBlackHoleConfig configures swarm to use c as the config for UDP black hole detection
// n is the size of the sliding window used to evaluate black hole state
// min is the minimum number of successes out of n required to not block requests
func WithUDPBlackHoleConfig(enabled bool, n, min int) Option {
return func(s *Swarm) error {
s.udpBlackHoleConfig = blackHoleConfig{Enabled: enabled, N: n, MinSuccesses: min}
return nil
}
}
// WithIPv6BlackHoleConfig configures swarm to use c as the config for IPv6 black hole detection
// n is the size of the sliding window used to evaluate black hole state
// min is the minimum number of successes out of n required to not block requests
func WithIPv6BlackHoleConfig(enabled bool, n, min int) Option {
return func(s *Swarm) error {
s.ipv6BlackHoleConfig = blackHoleConfig{Enabled: enabled, N: n, MinSuccesses: min}
return nil
}
}
// Swarm is a connection muxer, allowing connections to other peers to
// be opened and closed, while still using the same Chan for all
// communication. The Chan sends/receives Messages, which note the
// destination or source Peer.
type Swarm struct {
nextConnID atomic.Uint64
nextStreamID atomic.Uint64
// Close refcount. This allows us to fully wait for the swarm to be torn
// down before continuing.
refs sync.WaitGroup
emitter event.Emitter
rcmgr network.ResourceManager
local peer.ID
peers peerstore.Peerstore
dialTimeout time.Duration
dialTimeoutLocal time.Duration
conns struct {
sync.RWMutex
m map[peer.ID][]*Conn
}
listeners struct {
sync.RWMutex
ifaceListenAddres []ma.Multiaddr
cacheEOL time.Time
m map[transport.Listener]struct{}
}
notifs struct {
sync.RWMutex
m map[network.Notifiee]struct{}
}
directConnNotifs struct {
sync.Mutex
m map[peer.ID][]chan struct{}
}
transports struct {
sync.RWMutex
m map[int]transport.Transport
}
maResolver *madns.Resolver
// stream handlers
streamh atomic.Pointer[network.StreamHandler]
// dialing helpers
dsync *dialSync
backf DialBackoff
limiter *dialLimiter
gater connmgr.ConnectionGater
closeOnce sync.Once
ctx context.Context // is canceled when Close is called
ctxCancel context.CancelFunc
bwc metrics.Reporter
metricsTracer MetricsTracer
dialRanker network.DialRanker
udpBlackHoleConfig blackHoleConfig
ipv6BlackHoleConfig blackHoleConfig
bhd *blackHoleDetector
}
// NewSwarm constructs a Swarm.
func NewSwarm(local peer.ID, peers peerstore.Peerstore, eventBus event.Bus, opts ...Option) (*Swarm, error) {
emitter, err := eventBus.Emitter(new(event.EvtPeerConnectednessChanged))
if err != nil {
return nil, err
}
ctx, cancel := context.WithCancel(context.Background())
s := &Swarm{
local: local,
peers: peers,
emitter: emitter,
ctx: ctx,
ctxCancel: cancel,
dialTimeout: defaultDialTimeout,
dialTimeoutLocal: defaultDialTimeoutLocal,
maResolver: madns.DefaultResolver,
dialRanker: DefaultDialRanker,
// A black hole is a binary property. On a network if UDP dials are blocked or there is
// no IPv6 connectivity, all dials will fail. So a low success rate of 5 out 100 dials
// is good enough.
udpBlackHoleConfig: blackHoleConfig{Enabled: true, N: 100, MinSuccesses: 5},
ipv6BlackHoleConfig: blackHoleConfig{Enabled: true, N: 100, MinSuccesses: 5},
}
s.conns.m = make(map[peer.ID][]*Conn)
s.listeners.m = make(map[transport.Listener]struct{})
s.transports.m = make(map[int]transport.Transport)
s.notifs.m = make(map[network.Notifiee]struct{})
s.directConnNotifs.m = make(map[peer.ID][]chan struct{})
for _, opt := range opts {
if err := opt(s); err != nil {
return nil, err
}
}
if s.rcmgr == nil {
s.rcmgr = &network.NullResourceManager{}
}
s.dsync = newDialSync(s.dialWorkerLoop)
s.limiter = newDialLimiter(s.dialAddr)
s.backf.init(s.ctx)
s.bhd = newBlackHoleDetector(s.udpBlackHoleConfig, s.ipv6BlackHoleConfig, s.metricsTracer)
return s, nil
}
func (s *Swarm) Close() error {
s.closeOnce.Do(s.close)
return nil
}
func (s *Swarm) close() {
s.ctxCancel()
s.emitter.Close()
// Prevents new connections and/or listeners from being added to the swarm.
s.listeners.Lock()
listeners := s.listeners.m
s.listeners.m = nil
s.listeners.Unlock()
s.conns.Lock()
conns := s.conns.m
s.conns.m = nil
s.conns.Unlock()
// Lots of goroutines but we might as well do this in parallel. We want to shut down as fast as
// possible.
for l := range listeners {
go func(l transport.Listener) {
if err := l.Close(); err != nil && err != transport.ErrListenerClosed {
log.Errorf("error when shutting down listener: %s", err)
}
}(l)
}
for _, cs := range conns {
for _, c := range cs {
go func(c *Conn) {
if err := c.Close(); err != nil {
log.Errorf("error when shutting down connection: %s", err)
}
}(c)
}
}
// Wait for everything to finish.
s.refs.Wait()
// Now close out any transports (if necessary). Do this after closing
// all connections/listeners.
s.transports.Lock()
transports := s.transports.m
s.transports.m = nil
s.transports.Unlock()
// Dedup transports that may be listening on multiple protocols
transportsToClose := make(map[transport.Transport]struct{}, len(transports))
for _, t := range transports {
transportsToClose[t] = struct{}{}
}
var wg sync.WaitGroup
for t := range transportsToClose {
if closer, ok := t.(io.Closer); ok {
wg.Add(1)
go func(c io.Closer) {
defer wg.Done()
if err := closer.Close(); err != nil {
log.Errorf("error when closing down transport %T: %s", c, err)
}
}(closer)
}
}
wg.Wait()
}
func (s *Swarm) addConn(tc transport.CapableConn, dir network.Direction) (*Conn, error) {
var (
p = tc.RemotePeer()
addr = tc.RemoteMultiaddr()
)
// create the Stat object, initializing with the underlying connection Stat if available
var stat network.ConnStats
if cs, ok := tc.(network.ConnStat); ok {
stat = cs.Stat()
}
stat.Direction = dir
stat.Opened = time.Now()
// Wrap and register the connection.
c := &Conn{
conn: tc,
swarm: s,
stat: stat,
id: s.nextConnID.Add(1),
}
// we ONLY check upgraded connections here so we can send them a Disconnect message.
// If we do this in the Upgrader, we will not be able to do this.
if s.gater != nil {
if allow, _ := s.gater.InterceptUpgraded(c); !allow {
// TODO Send disconnect with reason here
err := tc.Close()
if err != nil {
log.Warnf("failed to close connection with peer %s and addr %s; err: %s", p, addr, err)
}
return nil, ErrGaterDisallowedConnection
}
}
// Add the public key.
if pk := tc.RemotePublicKey(); pk != nil {
s.peers.AddPubKey(p, pk)
}
// Clear any backoffs
s.backf.Clear(p)
// Finally, add the peer.
s.conns.Lock()
// Check if we're still online
if s.conns.m == nil {
s.conns.Unlock()
tc.Close()
return nil, ErrSwarmClosed
}
c.streams.m = make(map[*Stream]struct{})
isFirstConnection := len(s.conns.m[p]) == 0
s.conns.m[p] = append(s.conns.m[p], c)
// Add two swarm refs:
// * One will be decremented after the close notifications fire in Conn.doClose
// * The other will be decremented when Conn.start exits.
s.refs.Add(2)
// Take the notification lock before releasing the conns lock to block
// Disconnect notifications until after the Connect notifications done.
c.notifyLk.Lock()
s.conns.Unlock()
// Notify goroutines waiting for a direct connection
if !c.Stat().Transient {
// Go routines interested in waiting for direct connection first acquire this lock
// and then acquire s.conns.RLock. Do not acquire this lock before conns.Unlock to
// prevent deadlock.
s.directConnNotifs.Lock()
for _, ch := range s.directConnNotifs.m[p] {
close(ch)
}
delete(s.directConnNotifs.m, p)
s.directConnNotifs.Unlock()
}
// Emit event after releasing `s.conns` lock so that a consumer can still
// use swarm methods that need the `s.conns` lock.
if isFirstConnection {
s.emitter.Emit(event.EvtPeerConnectednessChanged{
Peer: p,
Connectedness: network.Connected,
})
}
s.notifyAll(func(f network.Notifiee) {
f.Connected(s, c)
})
c.notifyLk.Unlock()
c.start()
return c, nil
}
// Peerstore returns this swarms internal Peerstore.
func (s *Swarm) Peerstore() peerstore.Peerstore {
return s.peers
}
// SetStreamHandler assigns the handler for new streams.
func (s *Swarm) SetStreamHandler(handler network.StreamHandler) {
s.streamh.Store(&handler)
}
// StreamHandler gets the handler for new streams.
func (s *Swarm) StreamHandler() network.StreamHandler {
handler := s.streamh.Load()
if handler == nil {
return nil
}
return *handler
}
// NewStream creates a new stream on any available connection to peer, dialing
// if necessary.
// Use network.WithUseTransient to open a stream over a transient(relayed)
// connection.
func (s *Swarm) NewStream(ctx context.Context, p peer.ID) (network.Stream, error) {
log.Debugf("[%s] opening stream to peer [%s]", s.local, p)
// Algorithm:
// 1. Find the best connection, otherwise, dial.
// 2. If the best connection is transient, wait for a direct conn via conn
// reversal or hole punching.
// 3. Try opening a stream.
// 4. If the underlying connection is, in fact, closed, close the outer
// connection and try again. We do this in case we have a closed
// connection but don't notice it until we actually try to open a
// stream.
//
// TODO: Try all connections even if we get an error opening a stream on
// a non-closed connection.
numDials := 0
for {
c := s.bestConnToPeer(p)
if c == nil {
if nodial, _ := network.GetNoDial(ctx); !nodial {
numDials++
if numDials > DialAttempts {
return nil, errors.New("max dial attempts exceeded")
}
var err error
c, err = s.dialPeer(ctx, p)
if err != nil {
return nil, err
}
} else {
return nil, network.ErrNoConn
}
}
useTransient, _ := network.GetUseTransient(ctx)
if !useTransient && c.Stat().Transient {
var err error
c, err = s.waitForDirectConn(ctx, p)
if err != nil {
return nil, err
}
}
str, err := c.NewStream(ctx)
if err != nil {
if c.conn.IsClosed() {
continue
}
return nil, err
}
return str, nil
}
}
// waitForDirectConn waits for a direct connection established through hole punching or connection reversal.
func (s *Swarm) waitForDirectConn(ctx context.Context, p peer.ID) (*Conn, error) {
s.directConnNotifs.Lock()
c := s.bestConnToPeer(p)
if c == nil {
s.directConnNotifs.Unlock()
return nil, network.ErrNoConn
} else if !c.Stat().Transient {
s.directConnNotifs.Unlock()
return c, nil
}
// Wait for transient connection to upgrade to a direct connection either by
// connection reversal or hole punching.
ch := make(chan struct{})
s.directConnNotifs.m[p] = append(s.directConnNotifs.m[p], ch)
s.directConnNotifs.Unlock()
// apply the DialPeer timeout
ctx, cancel := context.WithTimeout(ctx, network.GetDialPeerTimeout(ctx))
defer cancel()
// Wait for notification.
select {
case <-ctx.Done():
// Remove ourselves from the notification list
s.directConnNotifs.Lock()
defer s.directConnNotifs.Unlock()
s.directConnNotifs.m[p] = slices.DeleteFunc(
s.directConnNotifs.m[p],
func(c chan struct{}) bool { return c == ch },
)
if len(s.directConnNotifs.m[p]) == 0 {
delete(s.directConnNotifs.m, p)
}
return nil, ctx.Err()
case <-ch:
// We do not need to remove ourselves from the list here as the notifier
// clears the map entry
c := s.bestConnToPeer(p)
if c == nil {
return nil, network.ErrNoConn
}
if c.Stat().Transient {
return nil, network.ErrTransientConn
}
return c, nil
}
}
// ConnsToPeer returns all the live connections to peer.
func (s *Swarm) ConnsToPeer(p peer.ID) []network.Conn {
// TODO: Consider sorting the connection list best to worst. Currently,
// it's sorted oldest to newest.
s.conns.RLock()
defer s.conns.RUnlock()
conns := s.conns.m[p]
output := make([]network.Conn, len(conns))
for i, c := range conns {
output[i] = c
}
return output
}
func isBetterConn(a, b *Conn) bool {
// If one is transient and not the other, prefer the non-transient connection.
aTransient := a.Stat().Transient
bTransient := b.Stat().Transient
if aTransient != bTransient {
return !aTransient
}
// If one is direct and not the other, prefer the direct connection.
aDirect := isDirectConn(a)
bDirect := isDirectConn(b)
if aDirect != bDirect {
return aDirect
}
// Otherwise, prefer the connection with more open streams.
a.streams.Lock()
aLen := len(a.streams.m)
a.streams.Unlock()
b.streams.Lock()
bLen := len(b.streams.m)
b.streams.Unlock()
if aLen != bLen {
return aLen > bLen
}
// finally, pick the last connection.
return true
}
// bestConnToPeer returns the best connection to peer.
func (s *Swarm) bestConnToPeer(p peer.ID) *Conn {
// TODO: Prefer some transports over others.
// For now, prefers direct connections over Relayed connections.
// For tie-breaking, select the newest non-closed connection with the most streams.
s.conns.RLock()
defer s.conns.RUnlock()
var best *Conn
for _, c := range s.conns.m[p] {
if c.conn.IsClosed() {
// We *will* garbage collect this soon anyways.
continue
}
if best == nil || isBetterConn(c, best) {
best = c
}
}
return best
}
// bestAcceptableConnToPeer returns the best acceptable connection, considering the passed in ctx.
// If network.WithForceDirectDial is used, it only returns a direct connections, ignoring
// any transient (relayed) connections to the peer.
func (s *Swarm) bestAcceptableConnToPeer(ctx context.Context, p peer.ID) *Conn {
conn := s.bestConnToPeer(p)
forceDirect, _ := network.GetForceDirectDial(ctx)
if forceDirect && !isDirectConn(conn) {
return nil
}
return conn
}
func isDirectConn(c *Conn) bool {
return c != nil && !c.conn.Transport().Proxy()
}
// Connectedness returns our "connectedness" state with the given peer.
//
// To check if we have an open connection, use `s.Connectedness(p) ==
// network.Connected`.
func (s *Swarm) Connectedness(p peer.ID) network.Connectedness {
if s.bestConnToPeer(p) != nil {
return network.Connected
}
return network.NotConnected
}
// Conns returns a slice of all connections.
func (s *Swarm) Conns() []network.Conn {
s.conns.RLock()
defer s.conns.RUnlock()
conns := make([]network.Conn, 0, len(s.conns.m))
for _, cs := range s.conns.m {
for _, c := range cs {
conns = append(conns, c)
}
}
return conns
}
// ClosePeer closes all connections to the given peer.
func (s *Swarm) ClosePeer(p peer.ID) error {
conns := s.ConnsToPeer(p)
switch len(conns) {
case 0:
return nil
case 1:
return conns[0].Close()
default:
errCh := make(chan error)
for _, c := range conns {
go func(c network.Conn) {
errCh <- c.Close()
}(c)
}
var errs []string
for range conns {
err := <-errCh
if err != nil {
errs = append(errs, err.Error())
}
}
if len(errs) > 0 {
return fmt.Errorf("when disconnecting from peer %s: %s", p, strings.Join(errs, ", "))
}
return nil
}
}
// Peers returns a copy of the set of peers swarm is connected to.
func (s *Swarm) Peers() []peer.ID {
s.conns.RLock()
defer s.conns.RUnlock()
peers := make([]peer.ID, 0, len(s.conns.m))
for p := range s.conns.m {
peers = append(peers, p)
}
return peers
}
// LocalPeer returns the local peer swarm is associated to.
func (s *Swarm) LocalPeer() peer.ID {
return s.local
}
// Backoff returns the DialBackoff object for this swarm.
func (s *Swarm) Backoff() *DialBackoff {
return &s.backf
}
// notifyAll sends a signal to all Notifiees
func (s *Swarm) notifyAll(notify func(network.Notifiee)) {
s.notifs.RLock()
for f := range s.notifs.m {
notify(f)
}
s.notifs.RUnlock()
}
// Notify signs up Notifiee to receive signals when events happen
func (s *Swarm) Notify(f network.Notifiee) {
s.notifs.Lock()
s.notifs.m[f] = struct{}{}
s.notifs.Unlock()
}
// StopNotify unregisters Notifiee fromr receiving signals
func (s *Swarm) StopNotify(f network.Notifiee) {
s.notifs.Lock()
delete(s.notifs.m, f)
s.notifs.Unlock()
}
func (s *Swarm) removeConn(c *Conn) {
p := c.RemotePeer()
s.conns.Lock()
cs := s.conns.m[p]
if len(cs) == 1 {
delete(s.conns.m, p)
s.conns.Unlock()
// Emit event after releasing `s.conns` lock so that a consumer can still
// use swarm methods that need the `s.conns` lock.
s.emitter.Emit(event.EvtPeerConnectednessChanged{
Peer: p,
Connectedness: network.NotConnected,
})
return
}
defer s.conns.Unlock()
for i, ci := range cs {
if ci == c {
// NOTE: We're intentionally preserving order.
// This way, connections to a peer are always
// sorted oldest to newest.
copy(cs[i:], cs[i+1:])
cs[len(cs)-1] = nil
s.conns.m[p] = cs[:len(cs)-1]
break
}
}
}
// String returns a string representation of Network.
func (s *Swarm) String() string {
return fmt.Sprintf("<Swarm %s>", s.LocalPeer())
}
func (s *Swarm) ResourceManager() network.ResourceManager {
return s.rcmgr
}
// Swarm is a Network.
var _ network.Network = (*Swarm)(nil)
var _ transport.TransportNetwork = (*Swarm)(nil)
type connWithMetrics struct {
transport.CapableConn
opened time.Time
dir network.Direction
metricsTracer MetricsTracer
}
func wrapWithMetrics(capableConn transport.CapableConn, metricsTracer MetricsTracer, opened time.Time, dir network.Direction) connWithMetrics {
c := connWithMetrics{CapableConn: capableConn, opened: opened, dir: dir, metricsTracer: metricsTracer}
c.metricsTracer.OpenedConnection(c.dir, capableConn.RemotePublicKey(), capableConn.ConnState(), capableConn.LocalMultiaddr())
return c
}
func (c connWithMetrics) completedHandshake() {
c.metricsTracer.CompletedHandshake(time.Since(c.opened), c.ConnState(), c.LocalMultiaddr())
}
func (c connWithMetrics) Close() error {
c.metricsTracer.ClosedConnection(c.dir, time.Since(c.opened), c.ConnState(), c.LocalMultiaddr())
return c.CapableConn.Close()
}
func (c connWithMetrics) Stat() network.ConnStats {
if cs, ok := c.CapableConn.(network.ConnStat); ok {
return cs.Stat()
}
return network.ConnStats{}
}
var _ network.ConnStat = connWithMetrics{}

View File

@@ -0,0 +1,72 @@
package swarm
import (
"time"
manet "github.com/multiformats/go-multiaddr/net"
ma "github.com/multiformats/go-multiaddr"
)
// ListenAddresses returns a list of addresses at which this swarm listens.
func (s *Swarm) ListenAddresses() []ma.Multiaddr {
s.listeners.RLock()
defer s.listeners.RUnlock()
return s.listenAddressesNoLock()
}
func (s *Swarm) listenAddressesNoLock() []ma.Multiaddr {
addrs := make([]ma.Multiaddr, 0, len(s.listeners.m)+10) // A bit extra so we may avoid an extra allocation in the for loop below.
for l := range s.listeners.m {
addrs = append(addrs, l.Multiaddr())
}
return addrs
}
const ifaceAddrsCacheDuration = 1 * time.Minute
// InterfaceListenAddresses returns a list of addresses at which this swarm
// listens. It expands "any interface" addresses (/ip4/0.0.0.0, /ip6/::) to
// use the known local interfaces.
func (s *Swarm) InterfaceListenAddresses() ([]ma.Multiaddr, error) {
s.listeners.RLock() // RLock start
ifaceListenAddres := s.listeners.ifaceListenAddres
isEOL := time.Now().After(s.listeners.cacheEOL)
s.listeners.RUnlock() // RLock end
if !isEOL {
// Cache is valid, clone the slice
return append(ifaceListenAddres[:0:0], ifaceListenAddres...), nil
}
// Cache is not valid
// Perfrom double checked locking
s.listeners.Lock() // Lock start
ifaceListenAddres = s.listeners.ifaceListenAddres
isEOL = time.Now().After(s.listeners.cacheEOL)
if isEOL {
// Cache is still invalid
listenAddres := s.listenAddressesNoLock()
if len(listenAddres) > 0 {
// We're actually listening on addresses.
var err error
ifaceListenAddres, err = manet.ResolveUnspecifiedAddresses(listenAddres, nil)
if err != nil {
s.listeners.Unlock() // Lock early exit
return nil, err
}
} else {
ifaceListenAddres = nil
}
s.listeners.ifaceListenAddres = ifaceListenAddres
s.listeners.cacheEOL = time.Now().Add(ifaceAddrsCacheDuration)
}
s.listeners.Unlock() // Lock end
return append(ifaceListenAddres[:0:0], ifaceListenAddres...), nil
}

View File

@@ -0,0 +1,268 @@
package swarm
import (
"context"
"errors"
"fmt"
"sync"
"time"
ic "github.com/libp2p/go-libp2p/core/crypto"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/core/transport"
ma "github.com/multiformats/go-multiaddr"
)
// TODO: Put this elsewhere.
// ErrConnClosed is returned when operating on a closed connection.
var ErrConnClosed = errors.New("connection closed")
// Conn is the connection type used by swarm. In general, you won't use this
// type directly.
type Conn struct {
id uint64
conn transport.CapableConn
swarm *Swarm
closeOnce sync.Once
err error
notifyLk sync.Mutex
streams struct {
sync.Mutex
m map[*Stream]struct{}
}
stat network.ConnStats
}
var _ network.Conn = &Conn{}
func (c *Conn) IsClosed() bool {
return c.conn.IsClosed()
}
func (c *Conn) ID() string {
// format: <first 10 chars of peer id>-<global conn ordinal>
return fmt.Sprintf("%s-%d", c.RemotePeer().String()[:10], c.id)
}
// Close closes this connection.
//
// Note: This method won't wait for the close notifications to finish as that
// would create a deadlock when called from an open notification (because all
// open notifications must finish before we can fire off the close
// notifications).
func (c *Conn) Close() error {
c.closeOnce.Do(c.doClose)
return c.err
}
func (c *Conn) doClose() {
c.swarm.removeConn(c)
// Prevent new streams from opening.
c.streams.Lock()
streams := c.streams.m
c.streams.m = nil
c.streams.Unlock()
c.err = c.conn.Close()
// This is just for cleaning up state. The connection has already been closed.
// We *could* optimize this but it really isn't worth it.
for s := range streams {
s.Reset()
}
// do this in a goroutine to avoid deadlocking if we call close in an open notification.
go func() {
// prevents us from issuing close notifications before finishing the open notifications
c.notifyLk.Lock()
defer c.notifyLk.Unlock()
c.swarm.notifyAll(func(f network.Notifiee) {
f.Disconnected(c.swarm, c)
})
c.swarm.refs.Done() // taken in Swarm.addConn
}()
}
func (c *Conn) removeStream(s *Stream) {
c.streams.Lock()
c.stat.NumStreams--
delete(c.streams.m, s)
c.streams.Unlock()
s.scope.Done()
}
// listens for new streams.
//
// The caller must take a swarm ref before calling. This function decrements the
// swarm ref count.
func (c *Conn) start() {
go func() {
defer c.swarm.refs.Done()
defer c.Close()
for {
ts, err := c.conn.AcceptStream()
if err != nil {
return
}
scope, err := c.swarm.ResourceManager().OpenStream(c.RemotePeer(), network.DirInbound)
if err != nil {
ts.Reset()
continue
}
c.swarm.refs.Add(1)
go func() {
s, err := c.addStream(ts, network.DirInbound, scope)
// Don't defer this. We don't want to block
// swarm shutdown on the connection handler.
c.swarm.refs.Done()
// We only get an error here when the swarm is closed or closing.
if err != nil {
scope.Done()
return
}
if h := c.swarm.StreamHandler(); h != nil {
h(s)
}
s.completeAcceptStreamGoroutine()
}()
}
}()
}
func (c *Conn) String() string {
return fmt.Sprintf(
"<swarm.Conn[%T] %s (%s) <-> %s (%s)>",
c.conn.Transport(),
c.conn.LocalMultiaddr(),
c.conn.LocalPeer(),
c.conn.RemoteMultiaddr(),
c.conn.RemotePeer(),
)
}
// LocalMultiaddr is the Multiaddr on this side
func (c *Conn) LocalMultiaddr() ma.Multiaddr {
return c.conn.LocalMultiaddr()
}
// LocalPeer is the Peer on our side of the connection
func (c *Conn) LocalPeer() peer.ID {
return c.conn.LocalPeer()
}
// RemoteMultiaddr is the Multiaddr on the remote side
func (c *Conn) RemoteMultiaddr() ma.Multiaddr {
return c.conn.RemoteMultiaddr()
}
// RemotePeer is the Peer on the remote side
func (c *Conn) RemotePeer() peer.ID {
return c.conn.RemotePeer()
}
// RemotePublicKey is the public key of the peer on the remote side
func (c *Conn) RemotePublicKey() ic.PubKey {
return c.conn.RemotePublicKey()
}
// ConnState is the security connection state. including early data result.
// Empty if not supported.
func (c *Conn) ConnState() network.ConnectionState {
return c.conn.ConnState()
}
// Stat returns metadata pertaining to this connection
func (c *Conn) Stat() network.ConnStats {
c.streams.Lock()
defer c.streams.Unlock()
return c.stat
}
// NewStream returns a new Stream from this connection
func (c *Conn) NewStream(ctx context.Context) (network.Stream, error) {
if c.Stat().Transient {
if useTransient, _ := network.GetUseTransient(ctx); !useTransient {
return nil, network.ErrTransientConn
}
}
scope, err := c.swarm.ResourceManager().OpenStream(c.RemotePeer(), network.DirOutbound)
if err != nil {
return nil, err
}
s, err := c.openAndAddStream(ctx, scope)
if err != nil {
scope.Done()
return nil, err
}
return s, nil
}
func (c *Conn) openAndAddStream(ctx context.Context, scope network.StreamManagementScope) (network.Stream, error) {
ts, err := c.conn.OpenStream(ctx)
if err != nil {
return nil, err
}
return c.addStream(ts, network.DirOutbound, scope)
}
func (c *Conn) addStream(ts network.MuxedStream, dir network.Direction, scope network.StreamManagementScope) (*Stream, error) {
c.streams.Lock()
// Are we still online?
if c.streams.m == nil {
c.streams.Unlock()
ts.Reset()
return nil, ErrConnClosed
}
// Wrap and register the stream.
s := &Stream{
stream: ts,
conn: c,
scope: scope,
stat: network.Stats{
Direction: dir,
Opened: time.Now(),
},
id: c.swarm.nextStreamID.Add(1),
acceptStreamGoroutineCompleted: dir != network.DirInbound,
}
c.stat.NumStreams++
c.streams.m[s] = struct{}{}
// Released once the stream disconnect notifications have finished
// firing (in Swarm.remove).
c.swarm.refs.Add(1)
c.streams.Unlock()
return s, nil
}
// GetStreams returns the streams associated with this connection.
func (c *Conn) GetStreams() []network.Stream {
c.streams.Lock()
defer c.streams.Unlock()
streams := make([]network.Stream, 0, len(c.streams.m))
for s := range c.streams.m {
streams = append(streams, s)
}
return streams
}
func (c *Conn) Scope() network.ConnScope {
return c.conn.Scope()
}

View File

@@ -0,0 +1,679 @@
package swarm
import (
"context"
"errors"
"fmt"
"net/netip"
"strconv"
"sync"
"time"
"github.com/libp2p/go-libp2p/core/canonicallog"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/core/peerstore"
"github.com/libp2p/go-libp2p/core/transport"
ma "github.com/multiformats/go-multiaddr"
madns "github.com/multiformats/go-multiaddr-dns"
mafmt "github.com/multiformats/go-multiaddr-fmt"
manet "github.com/multiformats/go-multiaddr/net"
)
// The maximum number of address resolution steps we'll perform for a single
// peer (for all addresses).
const maxAddressResolution = 32
// Diagram of dial sync:
//
// many callers of Dial() synched w. dials many addrs results to callers
// ----------------------\ dialsync use earliest /--------------
// -----------------------\ |----------\ /----------------
// ------------------------>------------<------- >---------<-----------------
// -----------------------| \----x \----------------
// ----------------------| \-----x \---------------
// any may fail if no addr at end
// retry dialAttempt x
var (
// ErrDialBackoff is returned by the backoff code when a given peer has
// been dialed too frequently
ErrDialBackoff = errors.New("dial backoff")
// ErrDialRefusedBlackHole is returned when we are in a black holed environment
ErrDialRefusedBlackHole = errors.New("dial refused because of black hole")
// ErrDialToSelf is returned if we attempt to dial our own peer
ErrDialToSelf = errors.New("dial to self attempted")
// ErrNoTransport is returned when we don't know a transport for the
// given multiaddr.
ErrNoTransport = errors.New("no transport for protocol")
// ErrAllDialsFailed is returned when connecting to a peer has ultimately failed
ErrAllDialsFailed = errors.New("all dials failed")
// ErrNoAddresses is returned when we fail to find any addresses for a
// peer we're trying to dial.
ErrNoAddresses = errors.New("no addresses")
// ErrNoGoodAddresses is returned when we find addresses for a peer but
// can't use any of them.
ErrNoGoodAddresses = errors.New("no good addresses")
// ErrGaterDisallowedConnection is returned when the gater prevents us from
// forming a connection with a peer.
ErrGaterDisallowedConnection = errors.New("gater disallows connection to peer")
)
// ErrQUICDraft29 wraps ErrNoTransport and provide a more meaningful error message
var ErrQUICDraft29 errQUICDraft29
type errQUICDraft29 struct{}
func (errQUICDraft29) Error() string {
return "QUIC draft-29 has been removed, QUIC (RFC 9000) is accessible with /quic-v1"
}
func (errQUICDraft29) Unwrap() error {
return ErrNoTransport
}
// DialAttempts governs how many times a goroutine will try to dial a given peer.
// Note: this is down to one, as we have _too many dials_ atm. To add back in,
// add loop back in Dial(.)
const DialAttempts = 1
// ConcurrentFdDials is the number of concurrent outbound dials over transports
// that consume file descriptors
const ConcurrentFdDials = 160
// DefaultPerPeerRateLimit is the number of concurrent outbound dials to make
// per peer
var DefaultPerPeerRateLimit = 8
// DialBackoff is a type for tracking peer dial backoffs. Dialbackoff is used to
// avoid over-dialing the same, dead peers. Whenever we totally time out on all
// addresses of a peer, we add the addresses to DialBackoff. Then, whenever we
// attempt to dial the peer again, we check each address for backoff. If it's on
// backoff, we don't dial the address and exit promptly. If a dial is
// successful, the peer and all its addresses are removed from backoff.
//
// * It's safe to use its zero value.
// * It's thread-safe.
// * It's *not* safe to move this type after using.
type DialBackoff struct {
entries map[peer.ID]map[string]*backoffAddr
lock sync.RWMutex
}
type backoffAddr struct {
tries int
until time.Time
}
func (db *DialBackoff) init(ctx context.Context) {
if db.entries == nil {
db.entries = make(map[peer.ID]map[string]*backoffAddr)
}
go db.background(ctx)
}
func (db *DialBackoff) background(ctx context.Context) {
ticker := time.NewTicker(BackoffMax)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
db.cleanup()
}
}
}
// Backoff returns whether the client should backoff from dialing
// peer p at address addr
func (db *DialBackoff) Backoff(p peer.ID, addr ma.Multiaddr) (backoff bool) {
db.lock.RLock()
defer db.lock.RUnlock()
ap, found := db.entries[p][string(addr.Bytes())]
return found && time.Now().Before(ap.until)
}
// BackoffBase is the base amount of time to backoff (default: 5s).
var BackoffBase = time.Second * 5
// BackoffCoef is the backoff coefficient (default: 1s).
var BackoffCoef = time.Second
// BackoffMax is the maximum backoff time (default: 5m).
var BackoffMax = time.Minute * 5
// AddBackoff adds peer's address to backoff.
//
// Backoff is not exponential, it's quadratic and computed according to the
// following formula:
//
// BackoffBase + BakoffCoef * PriorBackoffs^2
//
// Where PriorBackoffs is the number of previous backoffs.
func (db *DialBackoff) AddBackoff(p peer.ID, addr ma.Multiaddr) {
saddr := string(addr.Bytes())
db.lock.Lock()
defer db.lock.Unlock()
bp, ok := db.entries[p]
if !ok {
bp = make(map[string]*backoffAddr, 1)
db.entries[p] = bp
}
ba, ok := bp[saddr]
if !ok {
bp[saddr] = &backoffAddr{
tries: 1,
until: time.Now().Add(BackoffBase),
}
return
}
backoffTime := BackoffBase + BackoffCoef*time.Duration(ba.tries*ba.tries)
if backoffTime > BackoffMax {
backoffTime = BackoffMax
}
ba.until = time.Now().Add(backoffTime)
ba.tries++
}
// Clear removes a backoff record. Clients should call this after a
// successful Dial.
func (db *DialBackoff) Clear(p peer.ID) {
db.lock.Lock()
defer db.lock.Unlock()
delete(db.entries, p)
}
func (db *DialBackoff) cleanup() {
db.lock.Lock()
defer db.lock.Unlock()
now := time.Now()
for p, e := range db.entries {
good := false
for _, backoff := range e {
backoffTime := BackoffBase + BackoffCoef*time.Duration(backoff.tries*backoff.tries)
if backoffTime > BackoffMax {
backoffTime = BackoffMax
}
if now.Before(backoff.until.Add(backoffTime)) {
good = true
break
}
}
if !good {
delete(db.entries, p)
}
}
}
// DialPeer connects to a peer. Use network.WithForceDirectDial to force a
// direct connection.
//
// The idea is that the client of Swarm does not need to know what network
// the connection will happen over. Swarm can use whichever it choses.
// This allows us to use various transport protocols, do NAT traversal/relay,
// etc. to achieve connection.
func (s *Swarm) DialPeer(ctx context.Context, p peer.ID) (network.Conn, error) {
// Avoid typed nil issues.
c, err := s.dialPeer(ctx, p)
if err != nil {
return nil, err
}
return c, nil
}
// internal dial method that returns an unwrapped conn
//
// It is gated by the swarm's dial synchronization systems: dialsync and
// dialbackoff.
func (s *Swarm) dialPeer(ctx context.Context, p peer.ID) (*Conn, error) {
log.Debugw("dialing peer", "from", s.local, "to", p)
err := p.Validate()
if err != nil {
return nil, err
}
if p == s.local {
return nil, ErrDialToSelf
}
// check if we already have an open (usable) connection.
conn := s.bestAcceptableConnToPeer(ctx, p)
if conn != nil {
return conn, nil
}
if s.gater != nil && !s.gater.InterceptPeerDial(p) {
log.Debugf("gater disallowed outbound connection to peer %s", p)
return nil, &DialError{Peer: p, Cause: ErrGaterDisallowedConnection}
}
// apply the DialPeer timeout
ctx, cancel := context.WithTimeout(ctx, network.GetDialPeerTimeout(ctx))
defer cancel()
conn, err = s.dsync.Dial(ctx, p)
if err == nil {
// Ensure we connected to the correct peer.
// This was most likely already checked by the security protocol, but it doesn't hurt do it again here.
if conn.RemotePeer() != p {
conn.Close()
log.Errorw("Handshake failed to properly authenticate peer", "authenticated", conn.RemotePeer(), "expected", p)
return nil, fmt.Errorf("unexpected peer")
}
return conn, nil
}
log.Debugf("network for %s finished dialing %s", s.local, p)
if ctx.Err() != nil {
// Context error trumps any dial errors as it was likely the ultimate cause.
return nil, ctx.Err()
}
if s.ctx.Err() != nil {
// Ok, so the swarm is shutting down.
return nil, ErrSwarmClosed
}
return nil, err
}
// dialWorkerLoop synchronizes and executes concurrent dials to a single peer
func (s *Swarm) dialWorkerLoop(p peer.ID, reqch <-chan dialRequest) {
w := newDialWorker(s, p, reqch, nil)
w.loop()
}
func (s *Swarm) addrsForDial(ctx context.Context, p peer.ID) (goodAddrs []ma.Multiaddr, addrErrs []TransportError, err error) {
peerAddrs := s.peers.Addrs(p)
if len(peerAddrs) == 0 {
return nil, nil, ErrNoAddresses
}
// Resolve dns or dnsaddrs
resolved, err := s.resolveAddrs(ctx, peer.AddrInfo{ID: p, Addrs: peerAddrs})
if err != nil {
return nil, nil, err
}
goodAddrs = ma.Unique(resolved)
goodAddrs, addrErrs = s.filterKnownUndialables(p, goodAddrs)
if forceDirect, _ := network.GetForceDirectDial(ctx); forceDirect {
goodAddrs = ma.FilterAddrs(goodAddrs, s.nonProxyAddr)
}
if len(goodAddrs) == 0 {
return nil, addrErrs, ErrNoGoodAddresses
}
s.peers.AddAddrs(p, goodAddrs, peerstore.TempAddrTTL)
return goodAddrs, addrErrs, nil
}
func (s *Swarm) resolveAddrs(ctx context.Context, pi peer.AddrInfo) ([]ma.Multiaddr, error) {
p2paddr, err := ma.NewMultiaddr("/" + ma.ProtocolWithCode(ma.P_P2P).Name + "/" + pi.ID.String())
if err != nil {
return nil, err
}
var resolveSteps int
// Recursively resolve all addrs.
//
// While the toResolve list is non-empty:
// * Pop an address off.
// * If the address is fully resolved, add it to the resolved list.
// * Otherwise, resolve it and add the results to the "to resolve" list.
toResolve := append([]ma.Multiaddr{}, pi.Addrs...)
resolved := make([]ma.Multiaddr, 0, len(pi.Addrs))
for len(toResolve) > 0 {
// pop the last addr off.
addr := toResolve[len(toResolve)-1]
toResolve = toResolve[:len(toResolve)-1]
// if it's resolved, add it to the resolved list.
if !madns.Matches(addr) {
resolved = append(resolved, addr)
continue
}
resolveSteps++
// We've resolved too many addresses. We can keep all the fully
// resolved addresses but we'll need to skip the rest.
if resolveSteps >= maxAddressResolution {
log.Warnf(
"peer %s asked us to resolve too many addresses: %s/%s",
pi.ID,
resolveSteps,
maxAddressResolution,
)
continue
}
tpt := s.TransportForDialing(addr)
resolver, ok := tpt.(transport.Resolver)
if ok {
resolvedAddrs, err := resolver.Resolve(ctx, addr)
if err != nil {
log.Warnf("Failed to resolve multiaddr %s by transport %v: %v", addr, tpt, err)
continue
}
var added bool
for _, a := range resolvedAddrs {
if !addr.Equal(a) {
toResolve = append(toResolve, a)
added = true
}
}
if added {
continue
}
}
// otherwise, resolve it
reqaddr := addr.Encapsulate(p2paddr)
resaddrs, err := s.maResolver.Resolve(ctx, reqaddr)
if err != nil {
log.Infof("error resolving %s: %s", reqaddr, err)
}
// add the results to the toResolve list.
for _, res := range resaddrs {
pi, err := peer.AddrInfoFromP2pAddr(res)
if err != nil {
log.Infof("error parsing %s: %s", res, err)
}
toResolve = append(toResolve, pi.Addrs...)
}
}
return resolved, nil
}
func (s *Swarm) dialNextAddr(ctx context.Context, p peer.ID, addr ma.Multiaddr, resch chan transport.DialUpdate) error {
// check the dial backoff
if forceDirect, _ := network.GetForceDirectDial(ctx); !forceDirect {
if s.backf.Backoff(p, addr) {
return ErrDialBackoff
}
}
// start the dial
s.limitedDial(ctx, p, addr, resch)
return nil
}
func (s *Swarm) nonProxyAddr(addr ma.Multiaddr) bool {
t := s.TransportForDialing(addr)
return !t.Proxy()
}
var quicDraft29DialMatcher = mafmt.And(mafmt.IP, mafmt.Base(ma.P_UDP), mafmt.Base(ma.P_QUIC))
// filterKnownUndialables takes a list of multiaddrs, and removes those
// that we definitely don't want to dial: addresses configured to be blocked,
// IPv6 link-local addresses, addresses without a dial-capable transport,
// addresses that we know to be our own, and addresses with a better tranport
// available. This is an optimization to avoid wasting time on dials that we
// know are going to fail or for which we have a better alternative.
func (s *Swarm) filterKnownUndialables(p peer.ID, addrs []ma.Multiaddr) (goodAddrs []ma.Multiaddr, addrErrs []TransportError) {
lisAddrs, _ := s.InterfaceListenAddresses()
var ourAddrs []ma.Multiaddr
for _, addr := range lisAddrs {
// we're only sure about filtering out /ip4 and /ip6 addresses, so far
ma.ForEach(addr, func(c ma.Component) bool {
if c.Protocol().Code == ma.P_IP4 || c.Protocol().Code == ma.P_IP6 {
ourAddrs = append(ourAddrs, addr)
}
return false
})
}
addrErrs = make([]TransportError, 0, len(addrs))
// The order of checking for transport and filtering low priority addrs is important. If we
// can only dial /webtransport, we don't want to filter /webtransport addresses out because
// the peer had a /quic-v1 address
// filter addresses with no transport
addrs = ma.FilterAddrs(addrs, func(a ma.Multiaddr) bool {
if s.TransportForDialing(a) == nil {
e := ErrNoTransport
// We used to support QUIC draft-29 for a long time.
// Provide a more useful error when attempting to dial a QUIC draft-29 address.
if quicDraft29DialMatcher.Matches(a) {
e = ErrQUICDraft29
}
addrErrs = append(addrErrs, TransportError{Address: a, Cause: e})
return false
}
return true
})
// filter low priority addresses among the addresses we can dial
// We don't return an error for these addresses
addrs = filterLowPriorityAddresses(addrs)
// remove black holed addrs
addrs, blackHoledAddrs := s.bhd.FilterAddrs(addrs)
for _, a := range blackHoledAddrs {
addrErrs = append(addrErrs, TransportError{Address: a, Cause: ErrDialRefusedBlackHole})
}
return ma.FilterAddrs(addrs,
// Linux and BSD treat an unspecified address when dialing as a localhost address.
// Windows doesn't support this. We filter all such addresses out because peers
// listening on unspecified addresses will advertise more specific addresses.
// https://unix.stackexchange.com/a/419881
// https://superuser.com/a/1755455
func(addr ma.Multiaddr) bool {
return !manet.IsIPUnspecified(addr)
},
func(addr ma.Multiaddr) bool {
if ma.Contains(ourAddrs, addr) {
addrErrs = append(addrErrs, TransportError{Address: addr, Cause: ErrDialToSelf})
return false
}
return true
},
// TODO: Consider allowing link-local addresses
func(addr ma.Multiaddr) bool { return !manet.IsIP6LinkLocal(addr) },
func(addr ma.Multiaddr) bool {
if s.gater != nil && !s.gater.InterceptAddrDial(p, addr) {
addrErrs = append(addrErrs, TransportError{Address: addr, Cause: ErrGaterDisallowedConnection})
return false
}
return true
},
), addrErrs
}
// limitedDial will start a dial to the given peer when
// it is able, respecting the various different types of rate
// limiting that occur without using extra goroutines per addr
func (s *Swarm) limitedDial(ctx context.Context, p peer.ID, a ma.Multiaddr, resp chan transport.DialUpdate) {
timeout := s.dialTimeout
if manet.IsPrivateAddr(a) && s.dialTimeoutLocal < s.dialTimeout {
timeout = s.dialTimeoutLocal
}
s.limiter.AddDialJob(&dialJob{
addr: a,
peer: p,
resp: resp,
ctx: ctx,
timeout: timeout,
})
}
// dialAddr is the actual dial for an addr, indirectly invoked through the limiter
func (s *Swarm) dialAddr(ctx context.Context, p peer.ID, addr ma.Multiaddr, updCh chan<- transport.DialUpdate) (transport.CapableConn, error) {
// Just to double check. Costs nothing.
if s.local == p {
return nil, ErrDialToSelf
}
// Check before we start work
if err := ctx.Err(); err != nil {
log.Debugf("%s swarm not dialing. Context cancelled: %v. %s %s", s.local, err, p, addr)
return nil, err
}
log.Debugf("%s swarm dialing %s %s", s.local, p, addr)
tpt := s.TransportForDialing(addr)
if tpt == nil {
return nil, ErrNoTransport
}
start := time.Now()
var connC transport.CapableConn
var err error
if du, ok := tpt.(transport.DialUpdater); ok {
connC, err = du.DialWithUpdates(ctx, addr, p, updCh)
} else {
connC, err = tpt.Dial(ctx, addr, p)
}
// We're recording any error as a failure here.
// Notably, this also applies to cancelations (i.e. if another dial attempt was faster).
// This is ok since the black hole detector uses a very low threshold (5%).
s.bhd.RecordResult(addr, err == nil)
if err != nil {
if s.metricsTracer != nil {
s.metricsTracer.FailedDialing(addr, err, context.Cause(ctx))
}
return nil, err
}
canonicallog.LogPeerStatus(100, connC.RemotePeer(), connC.RemoteMultiaddr(), "connection_status", "established", "dir", "outbound")
if s.metricsTracer != nil {
connWithMetrics := wrapWithMetrics(connC, s.metricsTracer, start, network.DirOutbound)
connWithMetrics.completedHandshake()
connC = connWithMetrics
}
// Trust the transport? Yeah... right.
if connC.RemotePeer() != p {
connC.Close()
err = fmt.Errorf("BUG in transport %T: tried to dial %s, dialed %s", p, connC.RemotePeer(), tpt)
log.Error(err)
return nil, err
}
// success! we got one!
return connC, nil
}
// TODO We should have a `IsFdConsuming() bool` method on the `Transport` interface in go-libp2p/core/transport.
// This function checks if any of the transport protocols in the address requires a file descriptor.
// For now:
// A Non-circuit address which has the TCP/UNIX protocol is deemed FD consuming.
// For a circuit-relay address, we look at the address of the relay server/proxy
// and use the same logic as above to decide.
func isFdConsumingAddr(addr ma.Multiaddr) bool {
first, _ := ma.SplitFunc(addr, func(c ma.Component) bool {
return c.Protocol().Code == ma.P_CIRCUIT
})
// for safety
if first == nil {
return true
}
_, err1 := first.ValueForProtocol(ma.P_TCP)
_, err2 := first.ValueForProtocol(ma.P_UNIX)
return err1 == nil || err2 == nil
}
func isRelayAddr(addr ma.Multiaddr) bool {
_, err := addr.ValueForProtocol(ma.P_CIRCUIT)
return err == nil
}
// filterLowPriorityAddresses removes addresses inplace for which we have a better alternative
// 1. If a /quic-v1 address is present, filter out /quic and /webtransport address on the same 2-tuple:
// QUIC v1 is preferred over the deprecated QUIC draft-29, and given the choice, we prefer using
// raw QUIC over using WebTransport.
// 2. If a /tcp address is present, filter out /ws or /wss addresses on the same 2-tuple:
// We prefer using raw TCP over using WebSocket.
func filterLowPriorityAddresses(addrs []ma.Multiaddr) []ma.Multiaddr {
// make a map of QUIC v1 and TCP AddrPorts.
quicV1Addr := make(map[netip.AddrPort]struct{})
tcpAddr := make(map[netip.AddrPort]struct{})
for _, a := range addrs {
switch {
case isProtocolAddr(a, ma.P_WEBTRANSPORT):
case isProtocolAddr(a, ma.P_QUIC_V1):
ap, err := addrPort(a, ma.P_UDP)
if err != nil {
continue
}
quicV1Addr[ap] = struct{}{}
case isProtocolAddr(a, ma.P_WS) || isProtocolAddr(a, ma.P_WSS):
case isProtocolAddr(a, ma.P_TCP):
ap, err := addrPort(a, ma.P_TCP)
if err != nil {
continue
}
tcpAddr[ap] = struct{}{}
}
}
i := 0
for _, a := range addrs {
switch {
case isProtocolAddr(a, ma.P_WEBTRANSPORT) || isProtocolAddr(a, ma.P_QUIC):
ap, err := addrPort(a, ma.P_UDP)
if err != nil {
break
}
if _, ok := quicV1Addr[ap]; ok {
continue
}
case isProtocolAddr(a, ma.P_WS) || isProtocolAddr(a, ma.P_WSS):
ap, err := addrPort(a, ma.P_TCP)
if err != nil {
break
}
if _, ok := tcpAddr[ap]; ok {
continue
}
}
addrs[i] = a
i++
}
return addrs[:i]
}
// addrPort returns the ip and port for a. p should be either ma.P_TCP or ma.P_UDP.
// a must be an (ip, TCP) or (ip, udp) address.
func addrPort(a ma.Multiaddr, p int) (netip.AddrPort, error) {
ip, err := manet.ToIP(a)
if err != nil {
return netip.AddrPort{}, err
}
port, err := a.ValueForProtocol(p)
if err != nil {
return netip.AddrPort{}, err
}
pi, err := strconv.Atoi(port)
if err != nil {
return netip.AddrPort{}, err
}
addr, ok := netip.AddrFromSlice(ip)
if !ok {
return netip.AddrPort{}, fmt.Errorf("failed to parse IP %s", ip)
}
return netip.AddrPortFrom(addr, uint16(pi)), nil
}

View File

@@ -0,0 +1,168 @@
package swarm
import (
"errors"
"fmt"
"time"
"github.com/libp2p/go-libp2p/core/canonicallog"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/transport"
ma "github.com/multiformats/go-multiaddr"
)
// Listen sets up listeners for all of the given addresses.
// It returns as long as we successfully listen on at least *one* address.
func (s *Swarm) Listen(addrs ...ma.Multiaddr) error {
errs := make([]error, len(addrs))
var succeeded int
for i, a := range addrs {
if err := s.AddListenAddr(a); err != nil {
errs[i] = err
} else {
succeeded++
}
}
for i, e := range errs {
if e != nil {
log.Warnw("listening failed", "on", addrs[i], "error", errs[i])
}
}
if succeeded == 0 && len(addrs) > 0 {
return fmt.Errorf("failed to listen on any addresses: %s", errs)
}
return nil
}
// ListenClose stop and delete listeners for all of the given addresses. If an
// any address belongs to one of the addreses a Listener provides, then the
// Listener will close for *all* addresses it provides. For example if you close
// and address with `/quic`, then the QUIC listener will close and also close
// any `/quic-v1` address.
func (s *Swarm) ListenClose(addrs ...ma.Multiaddr) {
listenersToClose := make(map[transport.Listener]struct{}, len(addrs))
s.listeners.Lock()
for l := range s.listeners.m {
if !containsMultiaddr(addrs, l.Multiaddr()) {
continue
}
delete(s.listeners.m, l)
listenersToClose[l] = struct{}{}
}
s.listeners.cacheEOL = time.Time{}
s.listeners.Unlock()
for l := range listenersToClose {
l.Close()
}
}
// AddListenAddr tells the swarm to listen on a single address. Unlike Listen,
// this method does not attempt to filter out bad addresses.
func (s *Swarm) AddListenAddr(a ma.Multiaddr) error {
tpt := s.TransportForListening(a)
if tpt == nil {
// TransportForListening will return nil if either:
// 1. No transport has been registered.
// 2. We're closed (so we've nulled out the transport map.
//
// Distinguish between these two cases to avoid confusing users.
select {
case <-s.ctx.Done():
return ErrSwarmClosed
default:
return ErrNoTransport
}
}
list, err := tpt.Listen(a)
if err != nil {
return err
}
s.listeners.Lock()
if s.listeners.m == nil {
s.listeners.Unlock()
list.Close()
return ErrSwarmClosed
}
s.refs.Add(1)
s.listeners.m[list] = struct{}{}
s.listeners.cacheEOL = time.Time{}
s.listeners.Unlock()
maddr := list.Multiaddr()
// signal to our notifiees on listen.
s.notifyAll(func(n network.Notifiee) {
n.Listen(s, maddr)
})
go func() {
defer func() {
s.listeners.Lock()
_, ok := s.listeners.m[list]
if ok {
delete(s.listeners.m, list)
s.listeners.cacheEOL = time.Time{}
}
s.listeners.Unlock()
if ok {
list.Close()
log.Errorf("swarm listener unintentionally closed")
}
// signal to our notifiees on listen close.
s.notifyAll(func(n network.Notifiee) {
n.ListenClose(s, maddr)
})
s.refs.Done()
}()
for {
c, err := list.Accept()
if err != nil {
if !errors.Is(err, transport.ErrListenerClosed) {
log.Errorf("swarm listener for %s accept error: %s", a, err)
}
return
}
canonicallog.LogPeerStatus(100, c.RemotePeer(), c.RemoteMultiaddr(), "connection_status", "established", "dir", "inbound")
if s.metricsTracer != nil {
c = wrapWithMetrics(c, s.metricsTracer, time.Now(), network.DirInbound)
}
log.Debugf("swarm listener accepted connection: %s <-> %s", c.LocalMultiaddr(), c.RemoteMultiaddr())
s.refs.Add(1)
go func() {
defer s.refs.Done()
_, err := s.addConn(c, network.DirInbound)
switch err {
case nil:
case ErrSwarmClosed:
// ignore.
return
default:
log.Warnw("adding connection failed", "to", a, "error", err)
return
}
}()
}
}()
return nil
}
func containsMultiaddr(addrs []ma.Multiaddr, addr ma.Multiaddr) bool {
for _, a := range addrs {
if addr.Equal(a) {
return true
}
}
return false
}

View File

@@ -0,0 +1,287 @@
package swarm
import (
"context"
"errors"
"net"
"strings"
"time"
"github.com/libp2p/go-libp2p/core/crypto"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/p2p/metricshelper"
ma "github.com/multiformats/go-multiaddr"
"github.com/prometheus/client_golang/prometheus"
)
const metricNamespace = "libp2p_swarm"
var (
connsOpened = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: "connections_opened_total",
Help: "Connections Opened",
},
[]string{"dir", "transport", "security", "muxer", "early_muxer", "ip_version"},
)
keyTypes = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: "key_types_total",
Help: "key type",
},
[]string{"dir", "key_type"},
)
connsClosed = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: "connections_closed_total",
Help: "Connections Closed",
},
[]string{"dir", "transport", "security", "muxer", "early_muxer", "ip_version"},
)
dialError = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: "dial_errors_total",
Help: "Dial Error",
},
[]string{"transport", "error", "ip_version"},
)
connDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metricNamespace,
Name: "connection_duration_seconds",
Help: "Duration of a Connection",
Buckets: prometheus.ExponentialBuckets(1.0/16, 2, 25), // up to 24 days
},
[]string{"dir", "transport", "security", "muxer", "early_muxer", "ip_version"},
)
connHandshakeLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metricNamespace,
Name: "handshake_latency_seconds",
Help: "Duration of the libp2p Handshake",
Buckets: prometheus.ExponentialBuckets(0.001, 1.3, 35),
},
[]string{"transport", "security", "muxer", "early_muxer", "ip_version"},
)
dialsPerPeer = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricNamespace,
Name: "dials_per_peer_total",
Help: "Number of addresses dialed per peer",
},
[]string{"outcome", "num_dials"},
)
dialRankingDelay = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: metricNamespace,
Name: "dial_ranking_delay_seconds",
Help: "delay introduced by the dial ranking logic",
Buckets: []float64{0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1, 2},
},
)
blackHoleFilterState = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: "black_hole_filter_state",
Help: "State of the black hole filter",
},
[]string{"name"},
)
blackHoleFilterSuccessFraction = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: "black_hole_filter_success_fraction",
Help: "Fraction of successful dials among the last n requests",
},
[]string{"name"},
)
blackHoleFilterNextRequestAllowedAfter = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: "black_hole_filter_next_request_allowed_after",
Help: "Number of requests after which the next request will be allowed",
},
[]string{"name"},
)
collectors = []prometheus.Collector{
connsOpened,
keyTypes,
connsClosed,
dialError,
connDuration,
connHandshakeLatency,
dialsPerPeer,
dialRankingDelay,
blackHoleFilterSuccessFraction,
blackHoleFilterState,
blackHoleFilterNextRequestAllowedAfter,
}
)
type MetricsTracer interface {
OpenedConnection(network.Direction, crypto.PubKey, network.ConnectionState, ma.Multiaddr)
ClosedConnection(network.Direction, time.Duration, network.ConnectionState, ma.Multiaddr)
CompletedHandshake(time.Duration, network.ConnectionState, ma.Multiaddr)
FailedDialing(ma.Multiaddr, error, error)
DialCompleted(success bool, totalDials int)
DialRankingDelay(d time.Duration)
UpdatedBlackHoleFilterState(name string, state blackHoleState, nextProbeAfter int, successFraction float64)
}
type metricsTracer struct{}
var _ MetricsTracer = &metricsTracer{}
type metricsTracerSetting struct {
reg prometheus.Registerer
}
type MetricsTracerOption func(*metricsTracerSetting)
func WithRegisterer(reg prometheus.Registerer) MetricsTracerOption {
return func(s *metricsTracerSetting) {
if reg != nil {
s.reg = reg
}
}
}
func NewMetricsTracer(opts ...MetricsTracerOption) MetricsTracer {
setting := &metricsTracerSetting{reg: prometheus.DefaultRegisterer}
for _, opt := range opts {
opt(setting)
}
metricshelper.RegisterCollectors(setting.reg, collectors...)
return &metricsTracer{}
}
func appendConnectionState(tags []string, cs network.ConnectionState) []string {
if cs.Transport == "" {
// This shouldn't happen, unless the transport doesn't properly set the Transport field in the ConnectionState.
tags = append(tags, "unknown")
} else {
tags = append(tags, string(cs.Transport))
}
// These might be empty, depending on the transport.
// For example, QUIC doesn't set security nor muxer.
tags = append(tags, string(cs.Security))
tags = append(tags, string(cs.StreamMultiplexer))
earlyMuxer := "false"
if cs.UsedEarlyMuxerNegotiation {
earlyMuxer = "true"
}
tags = append(tags, earlyMuxer)
return tags
}
func (m *metricsTracer) OpenedConnection(dir network.Direction, p crypto.PubKey, cs network.ConnectionState, laddr ma.Multiaddr) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = append(*tags, metricshelper.GetDirection(dir))
*tags = appendConnectionState(*tags, cs)
*tags = append(*tags, metricshelper.GetIPVersion(laddr))
connsOpened.WithLabelValues(*tags...).Inc()
*tags = (*tags)[:0]
*tags = append(*tags, metricshelper.GetDirection(dir))
*tags = append(*tags, p.Type().String())
keyTypes.WithLabelValues(*tags...).Inc()
}
func (m *metricsTracer) ClosedConnection(dir network.Direction, duration time.Duration, cs network.ConnectionState, laddr ma.Multiaddr) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = append(*tags, metricshelper.GetDirection(dir))
*tags = appendConnectionState(*tags, cs)
*tags = append(*tags, metricshelper.GetIPVersion(laddr))
connsClosed.WithLabelValues(*tags...).Inc()
connDuration.WithLabelValues(*tags...).Observe(duration.Seconds())
}
func (m *metricsTracer) CompletedHandshake(t time.Duration, cs network.ConnectionState, laddr ma.Multiaddr) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = appendConnectionState(*tags, cs)
*tags = append(*tags, metricshelper.GetIPVersion(laddr))
connHandshakeLatency.WithLabelValues(*tags...).Observe(t.Seconds())
}
func (m *metricsTracer) FailedDialing(addr ma.Multiaddr, dialErr error, cause error) {
transport := metricshelper.GetTransport(addr)
e := "other"
// dial deadline exceeded or the the parent contexts deadline exceeded
if errors.Is(dialErr, context.DeadlineExceeded) || errors.Is(cause, context.DeadlineExceeded) {
e = "deadline"
} else if errors.Is(dialErr, context.Canceled) {
// dial was cancelled.
if errors.Is(cause, context.Canceled) {
// parent context was canceled
e = "application canceled"
} else if errors.Is(cause, errConcurrentDialSuccessful) {
e = "canceled: concurrent dial successful"
} else {
// something else
e = "canceled: other"
}
} else {
nerr, ok := dialErr.(net.Error)
if ok && nerr.Timeout() {
e = "timeout"
} else if strings.Contains(dialErr.Error(), "connect: connection refused") {
e = "connection refused"
}
}
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = append(*tags, transport, e)
*tags = append(*tags, metricshelper.GetIPVersion(addr))
dialError.WithLabelValues(*tags...).Inc()
}
func (m *metricsTracer) DialCompleted(success bool, totalDials int) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
if success {
*tags = append(*tags, "success")
} else {
*tags = append(*tags, "failed")
}
numDialLabels := [...]string{"0", "1", "2", "3", "4", "5", ">=6"}
var numDials string
if totalDials < len(numDialLabels) {
numDials = numDialLabels[totalDials]
} else {
numDials = numDialLabels[len(numDialLabels)-1]
}
*tags = append(*tags, numDials)
dialsPerPeer.WithLabelValues(*tags...).Inc()
}
func (m *metricsTracer) DialRankingDelay(d time.Duration) {
dialRankingDelay.Observe(d.Seconds())
}
func (m *metricsTracer) UpdatedBlackHoleFilterState(name string, state blackHoleState,
nextProbeAfter int, successFraction float64) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = append(*tags, name)
blackHoleFilterState.WithLabelValues(*tags...).Set(float64(state))
blackHoleFilterSuccessFraction.WithLabelValues(*tags...).Set(successFraction)
blackHoleFilterNextRequestAllowedAfter.WithLabelValues(*tags...).Set(float64(nextProbeAfter))
}

View File

@@ -0,0 +1,179 @@
package swarm
import (
"fmt"
"sync"
"sync/atomic"
"time"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/protocol"
)
// Validate Stream conforms to the go-libp2p-net Stream interface
var _ network.Stream = &Stream{}
// Stream is the stream type used by swarm. In general, you won't use this type
// directly.
type Stream struct {
id uint64
stream network.MuxedStream
conn *Conn
scope network.StreamManagementScope
closeMx sync.Mutex
isClosed bool
// acceptStreamGoroutineCompleted indicates whether the goroutine handling the incoming stream has exited
acceptStreamGoroutineCompleted bool
protocol atomic.Pointer[protocol.ID]
stat network.Stats
}
func (s *Stream) ID() string {
// format: <first 10 chars of peer id>-<global conn ordinal>-<global stream ordinal>
return fmt.Sprintf("%s-%d", s.conn.ID(), s.id)
}
func (s *Stream) String() string {
return fmt.Sprintf(
"<swarm.Stream[%s] %s (%s) <-> %s (%s)>",
s.conn.conn.Transport(),
s.conn.LocalMultiaddr(),
s.conn.LocalPeer(),
s.conn.RemoteMultiaddr(),
s.conn.RemotePeer(),
)
}
// Conn returns the Conn associated with this stream, as an network.Conn
func (s *Stream) Conn() network.Conn {
return s.conn
}
// Read reads bytes from a stream.
func (s *Stream) Read(p []byte) (int, error) {
n, err := s.stream.Read(p)
// TODO: push this down to a lower level for better accuracy.
if s.conn.swarm.bwc != nil {
s.conn.swarm.bwc.LogRecvMessage(int64(n))
s.conn.swarm.bwc.LogRecvMessageStream(int64(n), s.Protocol(), s.Conn().RemotePeer())
}
return n, err
}
// Write writes bytes to a stream, flushing for each call.
func (s *Stream) Write(p []byte) (int, error) {
n, err := s.stream.Write(p)
// TODO: push this down to a lower level for better accuracy.
if s.conn.swarm.bwc != nil {
s.conn.swarm.bwc.LogSentMessage(int64(n))
s.conn.swarm.bwc.LogSentMessageStream(int64(n), s.Protocol(), s.Conn().RemotePeer())
}
return n, err
}
// Close closes the stream, closing both ends and freeing all associated
// resources.
func (s *Stream) Close() error {
err := s.stream.Close()
s.closeAndRemoveStream()
return err
}
// Reset resets the stream, signaling an error on both ends and freeing all
// associated resources.
func (s *Stream) Reset() error {
err := s.stream.Reset()
s.closeAndRemoveStream()
return err
}
func (s *Stream) closeAndRemoveStream() {
s.closeMx.Lock()
defer s.closeMx.Unlock()
if s.isClosed {
return
}
s.isClosed = true
// We don't want to keep swarm from closing till the stream handler has exited
s.conn.swarm.refs.Done()
// Cleanup the stream from connection only after the stream handler has completed
if s.acceptStreamGoroutineCompleted {
s.conn.removeStream(s)
}
}
// CloseWrite closes the stream for writing, flushing all data and sending an EOF.
// This function does not free resources, call Close or Reset when done with the
// stream.
func (s *Stream) CloseWrite() error {
return s.stream.CloseWrite()
}
// CloseRead closes the stream for reading. This function does not free resources,
// call Close or Reset when done with the stream.
func (s *Stream) CloseRead() error {
return s.stream.CloseRead()
}
func (s *Stream) completeAcceptStreamGoroutine() {
s.closeMx.Lock()
defer s.closeMx.Unlock()
if s.acceptStreamGoroutineCompleted {
return
}
s.acceptStreamGoroutineCompleted = true
if s.isClosed {
s.conn.removeStream(s)
}
}
// Protocol returns the protocol negotiated on this stream (if set).
func (s *Stream) Protocol() protocol.ID {
p := s.protocol.Load()
if p == nil {
return ""
}
return *p
}
// SetProtocol sets the protocol for this stream.
//
// This doesn't actually *do* anything other than record the fact that we're
// speaking the given protocol over this stream. It's still up to the user to
// negotiate the protocol. This is usually done by the Host.
func (s *Stream) SetProtocol(p protocol.ID) error {
if err := s.scope.SetProtocol(p); err != nil {
return err
}
s.protocol.Store(&p)
return nil
}
// SetDeadline sets the read and write deadlines for this stream.
func (s *Stream) SetDeadline(t time.Time) error {
return s.stream.SetDeadline(t)
}
// SetReadDeadline sets the read deadline for this stream.
func (s *Stream) SetReadDeadline(t time.Time) error {
return s.stream.SetReadDeadline(t)
}
// SetWriteDeadline sets the write deadline for this stream.
func (s *Stream) SetWriteDeadline(t time.Time) error {
return s.stream.SetWriteDeadline(t)
}
// Stat returns metadata information for this stream.
func (s *Stream) Stat() network.Stats {
return s.stat
}
func (s *Stream) Scope() network.StreamScope {
return s.scope
}

View File

@@ -0,0 +1,109 @@
package swarm
import (
"fmt"
"strings"
"github.com/libp2p/go-libp2p/core/transport"
ma "github.com/multiformats/go-multiaddr"
)
// TransportForDialing retrieves the appropriate transport for dialing the given
// multiaddr.
func (s *Swarm) TransportForDialing(a ma.Multiaddr) transport.Transport {
protocols := a.Protocols()
if len(protocols) == 0 {
return nil
}
s.transports.RLock()
defer s.transports.RUnlock()
if len(s.transports.m) == 0 {
// make sure we're not just shutting down.
if s.transports.m != nil {
log.Error("you have no transports configured")
}
return nil
}
if isRelayAddr(a) {
return s.transports.m[ma.P_CIRCUIT]
}
for _, t := range s.transports.m {
if t.CanDial(a) {
return t
}
}
return nil
}
// TransportForListening retrieves the appropriate transport for listening on
// the given multiaddr.
func (s *Swarm) TransportForListening(a ma.Multiaddr) transport.Transport {
protocols := a.Protocols()
if len(protocols) == 0 {
return nil
}
s.transports.RLock()
defer s.transports.RUnlock()
if len(s.transports.m) == 0 {
// make sure we're not just shutting down.
if s.transports.m != nil {
log.Error("you have no transports configured")
}
return nil
}
selected := s.transports.m[protocols[len(protocols)-1].Code]
for _, p := range protocols {
transport, ok := s.transports.m[p.Code]
if !ok {
continue
}
if transport.Proxy() {
selected = transport
}
}
return selected
}
// AddTransport adds a transport to this swarm.
//
// Satisfies the Network interface from go-libp2p-transport.
func (s *Swarm) AddTransport(t transport.Transport) error {
protocols := t.Protocols()
if len(protocols) == 0 {
return fmt.Errorf("useless transport handles no protocols: %T", t)
}
s.transports.Lock()
defer s.transports.Unlock()
if s.transports.m == nil {
return ErrSwarmClosed
}
var registered []string
for _, p := range protocols {
if _, ok := s.transports.m[p]; ok {
proto := ma.ProtocolWithCode(p)
name := proto.Name
if name == "" {
name = fmt.Sprintf("unknown (%d)", p)
}
registered = append(registered, name)
}
}
if len(registered) > 0 {
return fmt.Errorf(
"transports already registered for protocol(s): %s",
strings.Join(registered, ", "),
)
}
for _, p := range protocols {
s.transports.m[p] = t
}
return nil
}