Refine discovery to trust mesh multiaddrs

This commit is contained in:
Claude Code
2025-10-17 09:13:54 +11:00
parent 3373f7b462
commit e2847a64ba
2 changed files with 51 additions and 5 deletions

5
TODO.md Normal file
View File

@@ -0,0 +1,5 @@
# TODO
1. Disable or refactor `SwarmDiscovery` so it consumes libp2p metadata rather than Docker task attachments (`DISCOVERY_METHOD=dns` fallback until parity is reached).
2. Shift council opportunity delivery to prefer mesh/NATS using the freshly stored `Agent.P2PAddr`, leaving HTTP as a compatibility fallback.
3. Emit telemetry/alerts whenever an agent reports zero `multiaddrs` so operations can catch misconfigured peers before discovery regresses.

View File

@@ -12,6 +12,7 @@ import (
"sync"
"time"
"github.com/multiformats/go-multiaddr"
"github.com/rs/zerolog/log"
)
@@ -485,6 +486,7 @@ func (d *Discovery) processServiceResponse(endpoint string, resp *http.Response)
Model string `json:"model"`
PeerID string `json:"peer_id"`
Metadata map[string]interface{} `json:"metadata"`
Multiaddrs []string `json:"multiaddrs"`
}
if err := json.NewDecoder(resp.Body).Decode(&agentInfo); err != nil {
@@ -494,14 +496,52 @@ func (d *Discovery) processServiceResponse(endpoint string, resp *http.Response)
}
apiEndpoint, host := normalizeAPIEndpoint(endpoint)
resolvedHTTPHost := ""
resolvedP2PAddr := ""
for _, addr := range agentInfo.Multiaddrs {
ma, err := multiaddr.NewMultiaddr(addr)
if err != nil {
log.Debug().Str("multiaddr", addr).Err(err).Msg("Skipping invalid multiaddr from agent health response")
continue
}
if resolvedP2PAddr == "" {
resolvedP2PAddr = addr
}
if resolvedHTTPHost != "" {
continue
}
if ipV4, err := ma.ValueForProtocol(multiaddr.P_IP4); err == nil && ipV4 != "" {
resolvedHTTPHost = ipV4
continue
}
if ipV6, err := ma.ValueForProtocol(multiaddr.P_IP6); err == nil && ipV6 != "" {
resolvedHTTPHost = fmt.Sprintf("[%s]", ipV6)
}
}
if resolvedHTTPHost == "" {
resolvedHTTPHost = host
}
p2pAddr := endpoint
if host != "" {
if resolvedP2PAddr != "" {
p2pAddr = resolvedP2PAddr
} else if agentInfo.PeerID != "" && resolvedHTTPHost != "" {
// Fall back to synthesised multiaddr using discovered host + known p2p port
p2pAddr = fmt.Sprintf("/ip4/%s/tcp/9000/p2p/%s", strings.Trim(resolvedHTTPHost, "[]"), agentInfo.PeerID)
} else if host != "" {
p2pAddr = fmt.Sprintf("%s:%d", host, 9000)
}
// Build multiaddr from peer_id if available
if agentInfo.PeerID != "" && host != "" {
p2pAddr = fmt.Sprintf("/ip4/%s/tcp/9000/p2p/%s", host, agentInfo.PeerID)
// Prefer resolved IP for HTTP endpoint when available to avoid Swarm VIP routing issues
if resolvedHTTPHost != "" {
apiEndpoint = fmt.Sprintf("http://%s:%d", resolvedHTTPHost, d.config.AgentPort)
}
// Create detailed agent from parsed info
@@ -546,7 +586,8 @@ func (d *Discovery) processServiceResponse(endpoint string, resp *http.Response)
log.Info().
Str("agent_id", agent.ID).
Str("peer_id", agent.PeerID).
Str("endpoint", endpoint).
Str("endpoint", apiEndpoint).
Str("p2p_addr", p2pAddr).
Msg("🤖 Discovered CHORUS agent with metadata")
}