diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..1c998b2 --- /dev/null +++ b/TODO.md @@ -0,0 +1,5 @@ +# TODO + +1. Disable or refactor `SwarmDiscovery` so it consumes libp2p metadata rather than Docker task attachments (`DISCOVERY_METHOD=dns` fallback until parity is reached). +2. Shift council opportunity delivery to prefer mesh/NATS using the freshly stored `Agent.P2PAddr`, leaving HTTP as a compatibility fallback. +3. Emit telemetry/alerts whenever an agent reports zero `multiaddrs` so operations can catch misconfigured peers before discovery regresses. diff --git a/internal/p2p/discovery.go b/internal/p2p/discovery.go index e16183f..9a08cfa 100644 --- a/internal/p2p/discovery.go +++ b/internal/p2p/discovery.go @@ -12,6 +12,7 @@ import ( "sync" "time" + "github.com/multiformats/go-multiaddr" "github.com/rs/zerolog/log" ) @@ -485,6 +486,7 @@ func (d *Discovery) processServiceResponse(endpoint string, resp *http.Response) Model string `json:"model"` PeerID string `json:"peer_id"` Metadata map[string]interface{} `json:"metadata"` + Multiaddrs []string `json:"multiaddrs"` } if err := json.NewDecoder(resp.Body).Decode(&agentInfo); err != nil { @@ -494,14 +496,52 @@ func (d *Discovery) processServiceResponse(endpoint string, resp *http.Response) } apiEndpoint, host := normalizeAPIEndpoint(endpoint) + + resolvedHTTPHost := "" + resolvedP2PAddr := "" + + for _, addr := range agentInfo.Multiaddrs { + ma, err := multiaddr.NewMultiaddr(addr) + if err != nil { + log.Debug().Str("multiaddr", addr).Err(err).Msg("Skipping invalid multiaddr from agent health response") + continue + } + + if resolvedP2PAddr == "" { + resolvedP2PAddr = addr + } + + if resolvedHTTPHost != "" { + continue + } + + if ipV4, err := ma.ValueForProtocol(multiaddr.P_IP4); err == nil && ipV4 != "" { + resolvedHTTPHost = ipV4 + continue + } + + if ipV6, err := ma.ValueForProtocol(multiaddr.P_IP6); err == nil && ipV6 != "" { + resolvedHTTPHost = fmt.Sprintf("[%s]", ipV6) + } + } + + if resolvedHTTPHost == "" { + resolvedHTTPHost = host + } + p2pAddr := endpoint - if host != "" { + if resolvedP2PAddr != "" { + p2pAddr = resolvedP2PAddr + } else if agentInfo.PeerID != "" && resolvedHTTPHost != "" { + // Fall back to synthesised multiaddr using discovered host + known p2p port + p2pAddr = fmt.Sprintf("/ip4/%s/tcp/9000/p2p/%s", strings.Trim(resolvedHTTPHost, "[]"), agentInfo.PeerID) + } else if host != "" { p2pAddr = fmt.Sprintf("%s:%d", host, 9000) } - // Build multiaddr from peer_id if available - if agentInfo.PeerID != "" && host != "" { - p2pAddr = fmt.Sprintf("/ip4/%s/tcp/9000/p2p/%s", host, agentInfo.PeerID) + // Prefer resolved IP for HTTP endpoint when available to avoid Swarm VIP routing issues + if resolvedHTTPHost != "" { + apiEndpoint = fmt.Sprintf("http://%s:%d", resolvedHTTPHost, d.config.AgentPort) } // Create detailed agent from parsed info @@ -546,7 +586,8 @@ func (d *Discovery) processServiceResponse(endpoint string, resp *http.Response) log.Info(). Str("agent_id", agent.ID). Str("peer_id", agent.PeerID). - Str("endpoint", endpoint). + Str("endpoint", apiEndpoint). + Str("p2p_addr", p2pAddr). Msg("🤖 Discovered CHORUS agent with metadata") }