Fix Docker Swarm discovery network name mismatch

- Changed NetworkName from 'chorus_default' to 'chorus_net' - This matches the actual network 'CHORUS_chorus_net' (service prefix added automatically) - Fixes discovered_count:0 issue - now successfully discovering all 25 agents - Updated IMPLEMENTATION-SUMMARY with deployment status Result: All 25 CHORUS agents now discovered successfully via Docker Swarm API 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-10 10:35:25 +11:00
parent 2826b28645
commit 9aeaa433fc
36 changed files with 4721 additions and 2213 deletions
--- a/internal/licensing/enterprise_validator.go
+++ b/internal/licensing/enterprise_validator.go
@@ -0,0 +1,363 @@
+package licensing
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/rs/zerolog/log"
+)
+
+// EnterpriseValidator handles validation of enterprise licenses via KACHING
+type EnterpriseValidator struct {
+	kachingEndpoint string
+	client          *http.Client
+	cache           *LicenseCache
+}
+
+// LicenseFeatures represents the features available in a license
+type LicenseFeatures struct {
+	SpecKitMethodology bool                   `json:"spec_kit_methodology"`
+	CustomTemplates    bool                   `json:"custom_templates"`
+	AdvancedAnalytics  bool                   `json:"advanced_analytics"`
+	WorkflowQuota      int                    `json:"workflow_quota"`
+	PrioritySupport    bool                   `json:"priority_support"`
+	Additional         map[string]interface{} `json:"additional,omitempty"`
+}
+
+// LicenseInfo contains validated license information
+type LicenseInfo struct {
+	LicenseID      uuid.UUID        `json:"license_id"`
+	OrgID          uuid.UUID        `json:"org_id"`
+	DeploymentID   uuid.UUID        `json:"deployment_id"`
+	PlanID         string           `json:"plan_id"` // community, professional, enterprise
+	Features       LicenseFeatures  `json:"features"`
+	ValidFrom      time.Time        `json:"valid_from"`
+	ValidTo        time.Time        `json:"valid_to"`
+	SeatsLimit     *int             `json:"seats_limit,omitempty"`
+	NodesLimit     *int             `json:"nodes_limit,omitempty"`
+	IsValid        bool             `json:"is_valid"`
+	ValidationTime time.Time        `json:"validation_time"`
+}
+
+// ValidationRequest sent to KACHING for license validation
+type ValidationRequest struct {
+	DeploymentID uuid.UUID `json:"deployment_id"`
+	Feature      string    `json:"feature"` // e.g., "spec_kit_methodology"
+	Context      Context   `json:"context"`
+}
+
+// Context provides additional information for license validation
+type Context struct {
+	ProjectID   string `json:"project_id,omitempty"`
+	IssueID     string `json:"issue_id,omitempty"`
+	CouncilID   string `json:"council_id,omitempty"`
+	RequestedBy string `json:"requested_by,omitempty"`
+}
+
+// ValidationResponse from KACHING
+type ValidationResponse struct {
+	Valid        bool          `json:"valid"`
+	License      *LicenseInfo  `json:"license,omitempty"`
+	Reason       string        `json:"reason,omitempty"`
+	UsageInfo    *UsageInfo    `json:"usage_info,omitempty"`
+	Suggestions  []Suggestion  `json:"suggestions,omitempty"`
+}
+
+// UsageInfo provides current usage statistics
+type UsageInfo struct {
+	CurrentMonth struct {
+		SpecKitWorkflows int `json:"spec_kit_workflows"`
+		Quota            int `json:"quota"`
+		Remaining        int `json:"remaining"`
+	} `json:"current_month"`
+	PreviousMonth struct {
+		SpecKitWorkflows int `json:"spec_kit_workflows"`
+	} `json:"previous_month"`
+}
+
+// Suggestion for license upgrades
+type Suggestion struct {
+	Type        string            `json:"type"`        // upgrade_tier, enable_feature
+	Title       string            `json:"title"`
+	Description string            `json:"description"`
+	TargetPlan  string            `json:"target_plan,omitempty"`
+	Benefits    map[string]string `json:"benefits,omitempty"`
+}
+
+// NewEnterpriseValidator creates a new enterprise license validator
+func NewEnterpriseValidator(kachingEndpoint string) *EnterpriseValidator {
+	return &EnterpriseValidator{
+		kachingEndpoint: kachingEndpoint,
+		client: &http.Client{
+			Timeout: 10 * time.Second,
+		},
+		cache: NewLicenseCache(5 * time.Minute), // 5-minute cache TTL
+	}
+}
+
+// ValidateSpecKitAccess validates if a deployment has access to spec-kit features
+func (v *EnterpriseValidator) ValidateSpecKitAccess(
+	ctx context.Context,
+	deploymentID uuid.UUID,
+	context Context,
+) (*ValidationResponse, error) {
+	startTime := time.Now()
+
+	log.Info().
+		Str("deployment_id", deploymentID.String()).
+		Str("feature", "spec_kit_methodology").
+		Msg("Validating spec-kit access")
+
+	// Check cache first
+	if cached := v.cache.Get(deploymentID, "spec_kit_methodology"); cached != nil {
+		log.Debug().
+			Str("deployment_id", deploymentID.String()).
+			Msg("Using cached license validation")
+		return cached, nil
+	}
+
+	// Prepare validation request
+	request := ValidationRequest{
+		DeploymentID: deploymentID,
+		Feature:      "spec_kit_methodology",
+		Context:      context,
+	}
+
+	response, err := v.callKachingValidation(ctx, request)
+	if err != nil {
+		log.Error().
+			Err(err).
+			Str("deployment_id", deploymentID.String()).
+			Msg("Failed to validate license with KACHING")
+		return nil, fmt.Errorf("license validation failed: %w", err)
+	}
+
+	// Cache successful responses
+	if response.Valid {
+		v.cache.Set(deploymentID, "spec_kit_methodology", response)
+	}
+
+	duration := time.Since(startTime).Milliseconds()
+	log.Info().
+		Str("deployment_id", deploymentID.String()).
+		Bool("valid", response.Valid).
+		Int64("duration_ms", duration).
+		Msg("License validation completed")
+
+	return response, nil
+}
+
+// ValidateWorkflowQuota checks if deployment has remaining spec-kit workflow quota
+func (v *EnterpriseValidator) ValidateWorkflowQuota(
+	ctx context.Context,
+	deploymentID uuid.UUID,
+	context Context,
+) (*ValidationResponse, error) {
+	// First validate basic access
+	response, err := v.ValidateSpecKitAccess(ctx, deploymentID, context)
+	if err != nil {
+		return nil, err
+	}
+
+	if !response.Valid {
+		return response, nil
+	}
+
+	// Check quota specifically
+	if response.UsageInfo != nil {
+		remaining := response.UsageInfo.CurrentMonth.Remaining
+		if remaining <= 0 {
+			response.Valid = false
+			response.Reason = "Monthly spec-kit workflow quota exceeded"
+
+			// Add upgrade suggestion if quota exceeded
+			if response.License != nil && response.License.PlanID == "professional" {
+				response.Suggestions = append(response.Suggestions, Suggestion{
+					Type:        "upgrade_tier",
+					Title:       "Upgrade to Enterprise",
+					Description: "Get unlimited spec-kit workflows with Enterprise tier",
+					TargetPlan:  "enterprise",
+					Benefits: map[string]string{
+						"workflows": "Unlimited spec-kit workflows",
+						"templates": "Custom template library access",
+						"support":   "24/7 priority support",
+					},
+				})
+			}
+		}
+	}
+
+	return response, nil
+}
+
+// GetLicenseInfo retrieves complete license information for a deployment
+func (v *EnterpriseValidator) GetLicenseInfo(
+	ctx context.Context,
+	deploymentID uuid.UUID,
+) (*LicenseInfo, error) {
+	response, err := v.ValidateSpecKitAccess(ctx, deploymentID, Context{})
+	if err != nil {
+		return nil, err
+	}
+
+	return response.License, nil
+}
+
+// IsEnterpriseFeatureEnabled checks if a specific enterprise feature is enabled
+func (v *EnterpriseValidator) IsEnterpriseFeatureEnabled(
+	ctx context.Context,
+	deploymentID uuid.UUID,
+	feature string,
+) (bool, error) {
+	request := ValidationRequest{
+		DeploymentID: deploymentID,
+		Feature:      feature,
+		Context:      Context{},
+	}
+
+	response, err := v.callKachingValidation(ctx, request)
+	if err != nil {
+		return false, err
+	}
+
+	return response.Valid, nil
+}
+
+// callKachingValidation makes HTTP request to KACHING validation endpoint
+func (v *EnterpriseValidator) callKachingValidation(
+	ctx context.Context,
+	request ValidationRequest,
+) (*ValidationResponse, error) {
+	// Prepare HTTP request
+	requestBody, err := json.Marshal(request)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	url := fmt.Sprintf("%s/v1/license/validate", v.kachingEndpoint)
+	req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(requestBody))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("User-Agent", "WHOOSH/1.0")
+
+	// Make request
+	resp, err := v.client.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("request failed: %w", err)
+	}
+	defer resp.Body.Close()
+
+	// Handle different response codes
+	switch resp.StatusCode {
+	case http.StatusOK:
+		var response ValidationResponse
+		if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
+			return nil, fmt.Errorf("failed to decode response: %w", err)
+		}
+		return &response, nil
+
+	case http.StatusUnauthorized:
+		return &ValidationResponse{
+			Valid:  false,
+			Reason: "Invalid or expired license",
+		}, nil
+
+	case http.StatusTooManyRequests:
+		return &ValidationResponse{
+			Valid:  false,
+			Reason: "Rate limit exceeded",
+		}, nil
+
+	case http.StatusServiceUnavailable:
+		// KACHING service unavailable - fallback to cached or basic validation
+		log.Warn().
+			Str("deployment_id", request.DeploymentID.String()).
+			Msg("KACHING service unavailable, falling back to basic validation")
+
+		return v.fallbackValidation(request.DeploymentID)
+
+	default:
+		return nil, fmt.Errorf("unexpected response status: %d", resp.StatusCode)
+	}
+}
+
+// fallbackValidation provides basic validation when KACHING is unavailable
+func (v *EnterpriseValidator) fallbackValidation(deploymentID uuid.UUID) (*ValidationResponse, error) {
+	// Check cache for any recent validation
+	if cached := v.cache.Get(deploymentID, "spec_kit_methodology"); cached != nil {
+		log.Info().
+			Str("deployment_id", deploymentID.String()).
+			Msg("Using cached license data for fallback validation")
+		return cached, nil
+	}
+
+	// Default to basic access for community features
+	return &ValidationResponse{
+		Valid:  false, // Spec-kit is enterprise only
+		Reason: "License service unavailable - spec-kit requires enterprise license",
+		Suggestions: []Suggestion{
+			{
+				Type:        "contact_support",
+				Title:       "Contact Support",
+				Description: "License service is temporarily unavailable. Contact support for assistance.",
+			},
+		},
+	}, nil
+}
+
+// TrackWorkflowUsage reports spec-kit workflow usage to KACHING for billing
+func (v *EnterpriseValidator) TrackWorkflowUsage(
+	ctx context.Context,
+	deploymentID uuid.UUID,
+	workflowType string,
+	metadata map[string]interface{},
+) error {
+	usageEvent := map[string]interface{}{
+		"deployment_id": deploymentID,
+		"event_type":    "spec_kit_workflow_executed",
+		"workflow_type": workflowType,
+		"timestamp":     time.Now().UTC(),
+		"metadata":      metadata,
+	}
+
+	eventData, err := json.Marshal(usageEvent)
+	if err != nil {
+		return fmt.Errorf("failed to marshal usage event: %w", err)
+	}
+
+	url := fmt.Sprintf("%s/v1/usage/track", v.kachingEndpoint)
+	req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(eventData))
+	if err != nil {
+		return fmt.Errorf("failed to create usage tracking request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := v.client.Do(req)
+	if err != nil {
+		// Log error but don't fail the workflow for usage tracking issues
+		log.Error().
+			Err(err).
+			Str("deployment_id", deploymentID.String()).
+			Str("workflow_type", workflowType).
+			Msg("Failed to track workflow usage")
+		return nil
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 400 {
+		log.Error().
+			Int("status_code", resp.StatusCode).
+			Str("deployment_id", deploymentID.String()).
+			Msg("Usage tracking request failed")
+	}
+
+	return nil
+}
--- a/internal/licensing/license_cache.go
+++ b/internal/licensing/license_cache.go
@@ -0,0 +1,136 @@
+package licensing
+
+import (
+	"sync"
+	"time"
+
+	"github.com/google/uuid"
+)
+
+// CacheEntry holds cached license validation data
+type CacheEntry struct {
+	Response  *ValidationResponse
+	ExpiresAt time.Time
+}
+
+// LicenseCache provides in-memory caching for license validations
+type LicenseCache struct {
+	mu      sync.RWMutex
+	entries map[string]*CacheEntry
+	ttl     time.Duration
+}
+
+// NewLicenseCache creates a new license cache with specified TTL
+func NewLicenseCache(ttl time.Duration) *LicenseCache {
+	cache := &LicenseCache{
+		entries: make(map[string]*CacheEntry),
+		ttl:     ttl,
+	}
+
+	// Start cleanup goroutine
+	go cache.cleanup()
+
+	return cache
+}
+
+// Get retrieves cached validation response if available and not expired
+func (c *LicenseCache) Get(deploymentID uuid.UUID, feature string) *ValidationResponse {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	key := c.cacheKey(deploymentID, feature)
+	entry, exists := c.entries[key]
+
+	if !exists || time.Now().After(entry.ExpiresAt) {
+		return nil
+	}
+
+	return entry.Response
+}
+
+// Set stores validation response in cache with TTL
+func (c *LicenseCache) Set(deploymentID uuid.UUID, feature string, response *ValidationResponse) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	key := c.cacheKey(deploymentID, feature)
+	c.entries[key] = &CacheEntry{
+		Response:  response,
+		ExpiresAt: time.Now().Add(c.ttl),
+	}
+}
+
+// Invalidate removes specific cache entry
+func (c *LicenseCache) Invalidate(deploymentID uuid.UUID, feature string) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	key := c.cacheKey(deploymentID, feature)
+	delete(c.entries, key)
+}
+
+// InvalidateAll removes all cached entries for a deployment
+func (c *LicenseCache) InvalidateAll(deploymentID uuid.UUID) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	prefix := deploymentID.String() + ":"
+	for key := range c.entries {
+		if len(key) > len(prefix) && key[:len(prefix)] == prefix {
+			delete(c.entries, key)
+		}
+	}
+}
+
+// Clear removes all cached entries
+func (c *LicenseCache) Clear() {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	c.entries = make(map[string]*CacheEntry)
+}
+
+// Stats returns cache statistics
+func (c *LicenseCache) Stats() map[string]interface{} {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	totalEntries := len(c.entries)
+	expiredEntries := 0
+	now := time.Now()
+
+	for _, entry := range c.entries {
+		if now.After(entry.ExpiresAt) {
+			expiredEntries++
+		}
+	}
+
+	return map[string]interface{}{
+		"total_entries":   totalEntries,
+		"expired_entries": expiredEntries,
+		"active_entries":  totalEntries - expiredEntries,
+		"ttl_seconds":     int(c.ttl.Seconds()),
+	}
+}
+
+// cacheKey generates cache key from deployment ID and feature
+func (c *LicenseCache) cacheKey(deploymentID uuid.UUID, feature string) string {
+	return deploymentID.String() + ":" + feature
+}
+
+// cleanup removes expired entries periodically
+func (c *LicenseCache) cleanup() {
+	ticker := time.NewTicker(c.ttl / 2) // Clean up twice as often as TTL
+	defer ticker.Stop()
+
+	for range ticker.C {
+		c.mu.Lock()
+		now := time.Now()
+		for key, entry := range c.entries {
+			if now.After(entry.ExpiresAt) {
+				delete(c.entries, key)
+			}
+		}
+		c.mu.Unlock()
+	}
+}