Add environment configuration and local development documentation

- Parameterize CORS_ORIGINS in docker-compose.swarm.yml - Add .env.example with configuration options - Create comprehensive LOCAL_DEVELOPMENT.md guide - Update README.md with environment variable documentation - Provide alternatives for local development without production domain 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-10 18:20:52 +10:00
parent daf0766e29
commit f3cbb5c6f7
50 changed files with 6339 additions and 528 deletions
--- a/scripts/auto_discover_agents.py
+++ b/scripts/auto_discover_agents.py
@@ -15,7 +15,7 @@ from typing import Dict, List, Optional, Tuple
 import time

 # Configuration
-HIVE_API_URL = "http://localhost:8087"
+HIVE_API_URL = "https://hive.home.deepblack.cloud"
 SUBNET_BASE = "192.168.1"
 OLLAMA_PORT = 11434
 DISCOVERY_TIMEOUT = 3
@@ -167,37 +167,37 @@ class AgentDiscovery:
        return discovered
    
    def determine_agent_specialty(self, models: List[str], hostname: str) -> str:
-        """Determine agent specialty based on models and hostname"""
+        """Determine agent specialty based on models and hostname using valid AgentType values"""
        model_str = " ".join(models).lower()
        hostname_lower = hostname.lower()
        
-        # Check hostname patterns
+        # Check hostname patterns - map to valid Hive AgentType values
        if "walnut" in hostname_lower:
-            return "Senior Full-Stack Development & Architecture"
+            return "pytorch_dev"  # Full-stack development
        elif "acacia" in hostname_lower:
-            return "Infrastructure, DevOps & System Architecture"
+            return "profiler"  # Infrastructure/DevOps
        elif "ironwood" in hostname_lower:
-            return "Backend Development & Code Analysis"
+            return "pytorch_dev"  # Backend development  
        elif "forsteinet" in hostname_lower:
-            return "AI Compute & Processing"
+            return "kernel_dev"  # AI Compute
        elif "rosewood" in hostname_lower:
-            return "Quality Assurance, Testing & Code Review"
+            return "tester"  # QA and Testing
        elif "oak" in hostname_lower:
-            return "iOS/macOS Development & Apple Ecosystem"
+            return "docs_writer"  # iOS/macOS Development
        
        # Check model patterns
-        if "starcoder" in model_str:
-            return "Full-Stack Development & Code Generation"
+        if "starcoder" in model_str or "codegemma" in model_str:
+            return "pytorch_dev"  # Code generation
        elif "deepseek-coder" in model_str:
-            return "Backend Development & Code Analysis"
+            return "pytorch_dev"  # Backend development
        elif "deepseek-r1" in model_str:
-            return "Infrastructure & System Architecture"
+            return "profiler"  # Analysis and architecture
        elif "devstral" in model_str:
-            return "Development & Code Review"
+            return "tester"  # Development review
        elif "llava" in model_str:
-            return "Vision & Multimodal Analysis"
+            return "docs_writer"  # Vision/documentation
        else:
-            return "General AI Development"
+            return "pytorch_dev"  # Default to pytorch development
    
    def determine_capabilities(self, specialty: str) -> List[str]:
        """Determine capabilities based on specialty"""
@@ -240,9 +240,11 @@ class AgentDiscovery:
            
            agent_data = {
                "id": hostname.lower().replace(".", "_"),
+                "name": f"{hostname} Ollama Agent",
                "endpoint": agent_info["endpoint"],
                "model": agent_info["primary_model"],
                "specialty": specialty,
+                "specialization": specialty,  # For compatibility
                "capabilities": capabilities,
                "available_models": agent_info["models"],
                "model_count": agent_info["model_count"],
@@ -251,6 +253,7 @@ class AgentDiscovery:
                "status": "available",
                "current_tasks": 0,
                "max_concurrent": 3,
+                "agent_type": "ollama",
                "discovered_at": time.time()
            }
            
--- a/scripts/deploy_distributed_workflows.sh
+++ b/scripts/deploy_distributed_workflows.sh
@@ -0,0 +1,481 @@
+#!/bin/bash
+
+# Distributed Hive Workflow Deployment Script
+# Deploys the enhanced distributed development workflow system across the cluster
+
+set -e
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Configuration
+PROJECT_ROOT="/home/tony/AI/projects/hive"
+CLUSTER_NODES=("192.168.1.72" "192.168.1.27" "192.168.1.113" "192.168.1.132" "192.168.1.106")
+CLUSTER_NAMES=("ACACIA" "WALNUT" "IRONWOOD" "ROSEWOOD" "FORSTEINET")
+SSH_USER="tony"
+SSH_PASS="silverfrond[1392]"
+
+# Logging function
+log() {
+    echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1"
+}
+
+error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+# Check prerequisites
+check_prerequisites() {
+    log "Checking prerequisites..."
+    
+    # Check if project directory exists
+    if [ ! -d "$PROJECT_ROOT" ]; then
+        error "Project directory not found: $PROJECT_ROOT"
+        exit 1
+    fi
+    
+    # Check if Redis is installed
+    if ! command -v redis-server &> /dev/null; then
+        warning "Redis server not found. Installing..."
+        sudo apt update && sudo apt install -y redis-server
+    fi
+    
+    # Check if Docker is available
+    if ! command -v docker &> /dev/null; then
+        error "Docker not found. Please install Docker first."
+        exit 1
+    fi
+    
+    # Check Python dependencies
+    if [ ! -f "$PROJECT_ROOT/backend/requirements.txt" ]; then
+        error "Requirements file not found"
+        exit 1
+    fi
+    
+    success "Prerequisites check completed"
+}
+
+# Install Python dependencies
+install_dependencies() {
+    log "Installing Python dependencies..."
+    
+    cd "$PROJECT_ROOT/backend"
+    
+    # Create virtual environment if it doesn't exist
+    if [ ! -d "venv" ]; then
+        python3 -m venv venv
+    fi
+    
+    # Activate virtual environment and install dependencies
+    source venv/bin/activate
+    pip install --upgrade pip
+    pip install -r requirements.txt
+    
+    # Install additional distributed workflow dependencies
+    pip install redis aioredis prometheus-client
+    
+    success "Dependencies installed"
+}
+
+# Setup Redis for distributed coordination
+setup_redis() {
+    log "Setting up Redis for distributed coordination..."
+    
+    # Start Redis service
+    sudo systemctl start redis-server
+    sudo systemctl enable redis-server
+    
+    # Configure Redis for cluster coordination
+    sudo tee /etc/redis/redis.conf.d/hive-distributed.conf > /dev/null <<EOF
+# Hive Distributed Workflow Configuration
+maxmemory 512mb
+maxmemory-policy allkeys-lru
+save 900 1
+save 300 10
+save 60 10000
+EOF
+    
+    # Restart Redis with new configuration
+    sudo systemctl restart redis-server
+    
+    # Test Redis connection
+    if redis-cli ping | grep -q "PONG"; then
+        success "Redis configured and running"
+    else
+        error "Redis setup failed"
+        exit 1
+    fi
+}
+
+# Check cluster connectivity
+check_cluster_connectivity() {
+    log "Checking cluster connectivity..."
+    
+    for i in "${!CLUSTER_NODES[@]}"; do
+        node="${CLUSTER_NODES[$i]}"
+        name="${CLUSTER_NAMES[$i]}"
+        
+        log "Testing connection to $name ($node)..."
+        
+        if sshpass -p "$SSH_PASS" ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no "$SSH_USER@$node" "echo 'Connection test successful'" > /dev/null 2>&1; then
+            success "✓ $name ($node) - Connected"
+        else
+            warning "✗ $name ($node) - Connection failed"
+        fi
+    done
+}
+
+# Deploy configuration to cluster nodes
+deploy_cluster_config() {
+    log "Deploying configuration to cluster nodes..."
+    
+    # Create configuration package
+    cd "$PROJECT_ROOT"
+    tar -czf /tmp/hive-distributed-config.tar.gz config/distributed_config.yaml
+    
+    for i in "${!CLUSTER_NODES[@]}"; do
+        node="${CLUSTER_NODES[$i]}"
+        name="${CLUSTER_NAMES[$i]}"
+        
+        log "Deploying to $name ($node)..."
+        
+        # Copy configuration
+        sshpass -p "$SSH_PASS" scp -o StrictHostKeyChecking=no /tmp/hive-distributed-config.tar.gz "$SSH_USER@$node:/tmp/"
+        
+        # Extract and setup configuration
+        sshpass -p "$SSH_PASS" ssh -o StrictHostKeyChecking=no "$SSH_USER@$node" "
+            mkdir -p /home/$SSH_USER/AI/projects/hive/config
+            cd /home/$SSH_USER/AI/projects/hive/config
+            tar -xzf /tmp/hive-distributed-config.tar.gz
+            chmod 644 distributed_config.yaml
+        "
+        
+        success "✓ Configuration deployed to $name"
+    done
+    
+    # Clean up
+    rm -f /tmp/hive-distributed-config.tar.gz
+}
+
+# Update Ollama configurations for distributed workflows
+update_ollama_configs() {
+    log "Updating Ollama configurations for distributed workflows..."
+    
+    for i in "${!CLUSTER_NODES[@]}"; do
+        node="${CLUSTER_NODES[$i]}"
+        name="${CLUSTER_NAMES[$i]}"
+        
+        log "Updating Ollama on $name ($node)..."
+        
+        # Update Ollama service configuration for better distributed performance
+        sshpass -p "$SSH_PASS" ssh -o StrictHostKeyChecking=no "$SSH_USER@$node" "
+            # Create Ollama service override directory if it doesn't exist
+            sudo mkdir -p /etc/systemd/system/ollama.service.d/
+            
+            # Create distributed workflow optimizations
+            sudo tee /etc/systemd/system/ollama.service.d/distributed.conf > /dev/null <<'OVERRIDE_EOF'
+[Service]
+Environment=\"OLLAMA_NUM_PARALLEL=4\"
+Environment=\"OLLAMA_MAX_QUEUE=10\"
+Environment=\"OLLAMA_KEEP_ALIVE=10m\"
+Environment=\"OLLAMA_HOST=0.0.0.0:11434\"
+OVERRIDE_EOF
+            
+            # Reload systemd and restart Ollama
+            sudo systemctl daemon-reload
+            sudo systemctl restart ollama || true
+        "
+        
+        success "✓ Ollama updated on $name"
+    done
+}
+
+# Start the distributed coordinator
+start_distributed_system() {
+    log "Starting distributed workflow system..."
+    
+    cd "$PROJECT_ROOT/backend"
+    source venv/bin/activate
+    
+    # Start the main Hive application with distributed workflows
+    export PYTHONPATH="$PROJECT_ROOT/backend:$PYTHONPATH"
+    export HIVE_CONFIG_PATH="$PROJECT_ROOT/config/distributed_config.yaml"
+    
+    # Run database migrations
+    log "Running database migrations..."
+    python -c "
+from app.core.database import init_database_with_retry
+init_database_with_retry()
+print('Database initialized')
+"
+    
+    # Start the application in the background
+    log "Starting Hive with distributed workflows..."
+    nohup python -m uvicorn app.main:app \
+        --host 0.0.0.0 \
+        --port 8000 \
+        --reload \
+        --log-level info > /tmp/hive-distributed.log 2>&1 &
+    
+    HIVE_PID=$!
+    echo $HIVE_PID > /tmp/hive-distributed.pid
+    
+    # Wait for startup
+    sleep 10
+    
+    # Check if the service is running
+    if kill -0 $HIVE_PID 2>/dev/null; then
+        success "Distributed workflow system started (PID: $HIVE_PID)"
+        log "Application logs: tail -f /tmp/hive-distributed.log"
+        log "Health check: curl http://localhost:8000/health"
+        log "Distributed API: curl http://localhost:8000/api/distributed/cluster/status"
+    else
+        error "Failed to start distributed workflow system"
+        exit 1
+    fi
+}
+
+# Run health checks
+run_health_checks() {
+    log "Running health checks..."
+    
+    # Wait for services to fully start
+    sleep 15
+    
+    # Check main API
+    if curl -s http://localhost:8000/health > /dev/null; then
+        success "✓ Main API responding"
+    else
+        error "✗ Main API not responding"
+    fi
+    
+    # Check distributed API
+    if curl -s http://localhost:8000/api/distributed/cluster/status > /dev/null; then
+        success "✓ Distributed API responding"
+    else
+        error "✗ Distributed API not responding"
+    fi
+    
+    # Check Redis connection
+    if redis-cli ping | grep -q "PONG"; then
+        success "✓ Redis connection working"
+    else
+        error "✗ Redis connection failed"
+    fi
+    
+    # Check cluster agent connectivity
+    response=$(curl -s http://localhost:8000/api/distributed/cluster/status || echo "{}")
+    healthy_agents=$(echo "$response" | python3 -c "
+import sys, json
+try:
+    data = json.load(sys.stdin)
+    print(data.get('healthy_agents', 0))
+except:
+    print(0)
+" || echo "0")
+    
+    if [ "$healthy_agents" -gt 0 ]; then
+        success "✓ $healthy_agents cluster agents healthy"
+    else
+        warning "✗ No healthy cluster agents found"
+    fi
+}
+
+# Create systemd service for production deployment
+create_systemd_service() {
+    log "Creating systemd service for production deployment..."
+    
+    sudo tee /etc/systemd/system/hive-distributed.service > /dev/null <<EOF
+[Unit]
+Description=Hive Distributed Workflow System
+After=network.target redis.service
+Wants=redis.service
+
+[Service]
+Type=exec
+User=$USER
+Group=$USER
+WorkingDirectory=$PROJECT_ROOT/backend
+Environment=PYTHONPATH=$PROJECT_ROOT/backend
+Environment=HIVE_CONFIG_PATH=$PROJECT_ROOT/config/distributed_config.yaml
+ExecStart=$PROJECT_ROOT/backend/venv/bin/python -m uvicorn app.main:app --host 0.0.0.0 --port 8000
+ExecReload=/bin/kill -HUP \$MAINPID
+Restart=always
+RestartSec=5
+StandardOutput=journal
+StandardError=journal
+
+[Install]
+WantedBy=multi-user.target
+EOF
+    
+    # Enable the service
+    sudo systemctl daemon-reload
+    sudo systemctl enable hive-distributed.service
+    
+    success "Systemd service created and enabled"
+    log "Use 'sudo systemctl start hive-distributed' to start the service"
+    log "Use 'sudo systemctl status hive-distributed' to check status"
+}
+
+# Generate deployment report
+generate_report() {
+    log "Generating deployment report..."
+    
+    report_file="/tmp/hive-distributed-deployment-report.txt"
+    
+    cat > "$report_file" <<EOF
+# Hive Distributed Workflow System - Deployment Report
+Generated: $(date)
+
+## Deployment Summary
+- Project Directory: $PROJECT_ROOT
+- Configuration: $PROJECT_ROOT/config/distributed_config.yaml
+- Log File: /tmp/hive-distributed.log
+- PID File: /tmp/hive-distributed.pid
+
+## Cluster Configuration
+EOF
+    
+    for i in "${!CLUSTER_NODES[@]}"; do
+        node="${CLUSTER_NODES[$i]}"
+        name="${CLUSTER_NAMES[$i]}"
+        echo "- $name: $node" >> "$report_file"
+    done
+    
+    cat >> "$report_file" <<EOF
+
+## Service Endpoints
+- Main API: http://localhost:8000
+- Health Check: http://localhost:8000/health
+- API Documentation: http://localhost:8000/docs
+- Distributed Workflows: http://localhost:8000/api/distributed/workflows
+- Cluster Status: http://localhost:8000/api/distributed/cluster/status
+- Performance Metrics: http://localhost:8000/api/distributed/performance/metrics
+
+## Management Commands
+- Start Service: sudo systemctl start hive-distributed
+- Stop Service: sudo systemctl stop hive-distributed
+- Restart Service: sudo systemctl restart hive-distributed
+- View Logs: sudo journalctl -u hive-distributed -f
+- View Application Logs: tail -f /tmp/hive-distributed.log
+
+## Cluster Operations
+- Check Cluster Status: curl http://localhost:8000/api/distributed/cluster/status
+- Submit Workflow: POST to /api/distributed/workflows
+- List Workflows: GET /api/distributed/workflows
+- Optimize Cluster: POST to /api/distributed/cluster/optimize
+
+## Troubleshooting
+- Redis Status: sudo systemctl status redis-server
+- Redis Connection: redis-cli ping
+- Agent Connectivity: Check Ollama services on cluster nodes
+- Application Health: curl http://localhost:8000/health
+
+## Next Steps
+1. Test distributed workflow submission
+2. Monitor cluster performance metrics
+3. Configure production security settings
+4. Set up automated backups
+5. Implement monitoring and alerting
+EOF
+    
+    success "Deployment report generated: $report_file"
+    cat "$report_file"
+}
+
+# Main deployment function
+main() {
+    echo -e "${GREEN}"
+    echo "╔══════════════════════════════════════════════════════════════╗"
+    echo "║           Hive Distributed Workflow Deployment              ║"
+    echo "║                                                              ║"
+    echo "║  Deploying cluster-wide development workflow orchestration  ║"
+    echo "╚══════════════════════════════════════════════════════════════╝"
+    echo -e "${NC}"
+    
+    log "Starting deployment of Hive Distributed Workflow System..."
+    
+    # Run deployment steps
+    check_prerequisites
+    install_dependencies
+    setup_redis
+    check_cluster_connectivity
+    deploy_cluster_config
+    update_ollama_configs
+    start_distributed_system
+    run_health_checks
+    create_systemd_service
+    generate_report
+    
+    echo -e "${GREEN}"
+    echo "╔══════════════════════════════════════════════════════════════╗"
+    echo "║                 Deployment Completed!                       ║"
+    echo "║                                                              ║"
+    echo "║  🚀 Hive Distributed Workflow System is now running         ║"
+    echo "║  📊 Visit http://localhost:8000/docs for API documentation  ║"
+    echo "║  🌐 Cluster status: http://localhost:8000/api/distributed/   ║"
+    echo "║     cluster/status                                           ║"
+    echo "╚══════════════════════════════════════════════════════════════╝"
+    echo -e "${NC}"
+}
+
+# Handle script arguments
+case "${1:-deploy}" in
+    "deploy")
+        main
+        ;;
+    "start")
+        log "Starting Hive Distributed Workflow System..."
+        sudo systemctl start hive-distributed
+        ;;
+    "stop")
+        log "Stopping Hive Distributed Workflow System..."
+        sudo systemctl stop hive-distributed
+        if [ -f /tmp/hive-distributed.pid ]; then
+            kill $(cat /tmp/hive-distributed.pid) 2>/dev/null || true
+            rm -f /tmp/hive-distributed.pid
+        fi
+        ;;
+    "status")
+        log "Checking system status..."
+        sudo systemctl status hive-distributed
+        ;;
+    "logs")
+        log "Showing application logs..."
+        tail -f /tmp/hive-distributed.log
+        ;;
+    "health")
+        log "Running health checks..."
+        run_health_checks
+        ;;
+    "cluster")
+        log "Checking cluster status..."
+        curl -s http://localhost:8000/api/distributed/cluster/status | python3 -m json.tool
+        ;;
+    *)
+        echo "Usage: $0 {deploy|start|stop|status|logs|health|cluster}"
+        echo ""
+        echo "Commands:"
+        echo "  deploy  - Full deployment of distributed workflow system"
+        echo "  start   - Start the service"
+        echo "  stop    - Stop the service"
+        echo "  status  - Show service status"
+        echo "  logs    - Show application logs"
+        echo "  health  - Run health checks"
+        echo "  cluster - Show cluster status"
+        exit 1
+        ;;
+esac
--- a/scripts/test_distributed_workflows.py
+++ b/scripts/test_distributed_workflows.py
@@ -0,0 +1,669 @@
+#!/usr/bin/env python3
+"""
+Comprehensive Testing Suite for Hive Distributed Workflows
+Tests all aspects of the distributed development workflow system
+"""
+
+import asyncio
+import aiohttp
+import json
+import time
+import sys
+import logging
+from datetime import datetime
+from typing import Dict, List, Any, Optional
+from dataclasses import dataclass
+import argparse
+import traceback
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+@dataclass
+class TestResult:
+    """Test result data class"""
+    name: str
+    success: bool
+    duration: float
+    message: str
+    data: Optional[Dict[str, Any]] = None
+
+class DistributedWorkflowTester:
+    """Comprehensive tester for distributed workflow system"""
+    
+    def __init__(self, base_url: str = "http://localhost:8000"):
+        self.base_url = base_url
+        self.session: Optional[aiohttp.ClientSession] = None
+        self.test_results: List[TestResult] = []
+        self.workflow_ids: List[str] = []
+        
+    async def __aenter__(self):
+        """Async context manager entry"""
+        self.session = aiohttp.ClientSession(
+            timeout=aiohttp.ClientTimeout(total=300)  # 5 minute timeout
+        )
+        return self
+    
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit"""
+        if self.session:
+            await self.session.close()
+    
+    async def run_test(self, test_name: str, test_func, *args, **kwargs) -> TestResult:
+        """Run a single test with error handling and timing"""
+        logger.info(f"🧪 Running test: {test_name}")
+        start_time = time.time()
+        
+        try:
+            result = await test_func(*args, **kwargs)
+            duration = time.time() - start_time
+            
+            if isinstance(result, bool):
+                success = result
+                message = "Test passed" if success else "Test failed"
+                data = None
+            elif isinstance(result, dict):
+                success = result.get('success', True)
+                message = result.get('message', 'Test completed')
+                data = result.get('data')
+            else:
+                success = True
+                message = str(result)
+                data = None
+            
+            test_result = TestResult(
+                name=test_name,
+                success=success,
+                duration=duration,
+                message=message,
+                data=data
+            )
+            
+            self.test_results.append(test_result)
+            
+            if success:
+                logger.info(f"✅ {test_name} - {message} ({duration:.2f}s)")
+            else:
+                logger.error(f"❌ {test_name} - {message} ({duration:.2f}s)")
+            
+            return test_result
+            
+        except Exception as e:
+            duration = time.time() - start_time
+            error_message = f"Exception: {str(e)}"
+            logger.error(f"💥 {test_name} - {error_message} ({duration:.2f}s)")
+            logger.debug(traceback.format_exc())
+            
+            test_result = TestResult(
+                name=test_name,
+                success=False,
+                duration=duration,
+                message=error_message
+            )
+            
+            self.test_results.append(test_result)
+            return test_result
+    
+    async def test_system_health(self) -> Dict[str, Any]:
+        """Test basic system health"""
+        async with self.session.get(f"{self.base_url}/health") as response:
+            if response.status != 200:
+                return {
+                    'success': False,
+                    'message': f"Health check failed with status {response.status}"
+                }
+            
+            health_data = await response.json()
+            
+            # Check component health
+            components = health_data.get('components', {})
+            unhealthy_components = [
+                name for name, status in components.items()
+                if status not in ['operational', 'healthy']
+            ]
+            
+            if unhealthy_components:
+                return {
+                    'success': False,
+                    'message': f"Unhealthy components: {unhealthy_components}",
+                    'data': health_data
+                }
+            
+            return {
+                'success': True,
+                'message': "All system components healthy",
+                'data': health_data
+            }
+    
+    async def test_cluster_status(self) -> Dict[str, Any]:
+        """Test cluster status endpoint"""
+        async with self.session.get(f"{self.base_url}/api/distributed/cluster/status") as response:
+            if response.status != 200:
+                return {
+                    'success': False,
+                    'message': f"Cluster status failed with status {response.status}"
+                }
+            
+            cluster_data = await response.json()
+            
+            total_agents = cluster_data.get('total_agents', 0)
+            healthy_agents = cluster_data.get('healthy_agents', 0)
+            
+            if total_agents == 0:
+                return {
+                    'success': False,
+                    'message': "No agents found in cluster",
+                    'data': cluster_data
+                }
+            
+            if healthy_agents == 0:
+                return {
+                    'success': False,
+                    'message': "No healthy agents in cluster",
+                    'data': cluster_data
+                }
+            
+            return {
+                'success': True,
+                'message': f"{healthy_agents}/{total_agents} agents healthy",
+                'data': cluster_data
+            }
+    
+    async def test_workflow_submission(self) -> Dict[str, Any]:
+        """Test workflow submission"""
+        workflow_data = {
+            "name": "Test REST API Development",
+            "requirements": "Create a simple REST API with user authentication, CRUD operations for a todo list, and comprehensive error handling.",
+            "context": "This is a test workflow to validate the distributed system functionality.",
+            "language": "python",
+            "priority": "high"
+        }
+        
+        async with self.session.post(
+            f"{self.base_url}/api/distributed/workflows",
+            json=workflow_data
+        ) as response:
+            if response.status != 200:
+                return {
+                    'success': False,
+                    'message': f"Workflow submission failed with status {response.status}"
+                }
+            
+            result = await response.json()
+            workflow_id = result.get('workflow_id')
+            
+            if not workflow_id:
+                return {
+                    'success': False,
+                    'message': "No workflow_id returned",
+                    'data': result
+                }
+            
+            self.workflow_ids.append(workflow_id)
+            
+            return {
+                'success': True,
+                'message': f"Workflow submitted successfully: {workflow_id}",
+                'data': result
+            }
+    
+    async def test_workflow_status_tracking(self) -> Dict[str, Any]:
+        """Test workflow status tracking"""
+        if not self.workflow_ids:
+            return {
+                'success': False,
+                'message': "No workflows available for status tracking"
+            }
+        
+        workflow_id = self.workflow_ids[0]
+        
+        # Poll workflow status for up to 2 minutes
+        max_wait_time = 120  # 2 minutes
+        poll_interval = 5    # 5 seconds
+        start_time = time.time()
+        
+        status_changes = []
+        
+        while time.time() - start_time < max_wait_time:
+            async with self.session.get(
+                f"{self.base_url}/api/distributed/workflows/{workflow_id}"
+            ) as response:
+                if response.status != 200:
+                    return {
+                        'success': False,
+                        'message': f"Status check failed with status {response.status}"
+                    }
+                
+                status_data = await response.json()
+                current_status = status_data.get('status', 'unknown')
+                progress = status_data.get('progress', 0)
+                
+                status_changes.append({
+                    'timestamp': datetime.now().isoformat(),
+                    'status': current_status,
+                    'progress': progress,
+                    'completed_tasks': status_data.get('completed_tasks', 0),
+                    'total_tasks': status_data.get('total_tasks', 0)
+                })
+                
+                logger.info(f"Workflow {workflow_id}: {current_status} ({progress:.1f}%)")
+                
+                if current_status in ['completed', 'failed']:
+                    break
+                
+                await asyncio.sleep(poll_interval)
+        
+        final_status = status_changes[-1] if status_changes else {}
+        
+        return {
+            'success': True,
+            'message': f"Status tracking completed. Final status: {final_status.get('status', 'unknown')}",
+            'data': {
+                'workflow_id': workflow_id,
+                'status_changes': status_changes,
+                'final_status': final_status
+            }
+        }
+    
+    async def test_multiple_workflow_submission(self) -> Dict[str, Any]:
+        """Test concurrent workflow submission"""
+        workflows = [
+            {
+                "name": "Frontend React App",
+                "requirements": "Create a React application with TypeScript, routing, and state management.",
+                "language": "typescript",
+                "priority": "normal"
+            },
+            {
+                "name": "Python Data Analysis",
+                "requirements": "Create a data analysis script with pandas, visualization, and reporting.",
+                "language": "python",
+                "priority": "normal"
+            },
+            {
+                "name": "Microservice Architecture",
+                "requirements": "Design a microservices system with API gateway and service discovery.",
+                "language": "go",
+                "priority": "high"
+            }
+        ]
+        
+        submission_tasks = []
+        for workflow in workflows:
+            task = self.session.post(
+                f"{self.base_url}/api/distributed/workflows",
+                json=workflow
+            )
+            submission_tasks.append(task)
+        
+        try:
+            responses = await asyncio.gather(*submission_tasks)
+            
+            submitted_workflows = []
+            for i, response in enumerate(responses):
+                if response.status == 200:
+                    result = await response.json()
+                    workflow_id = result.get('workflow_id')
+                    if workflow_id:
+                        self.workflow_ids.append(workflow_id)
+                        submitted_workflows.append({
+                            'name': workflows[i]['name'],
+                            'workflow_id': workflow_id
+                        })
+                response.close()
+            
+            return {
+                'success': len(submitted_workflows) == len(workflows),
+                'message': f"Submitted {len(submitted_workflows)}/{len(workflows)} workflows concurrently",
+                'data': {'submitted_workflows': submitted_workflows}
+            }
+            
+        except Exception as e:
+            return {
+                'success': False,
+                'message': f"Concurrent submission failed: {str(e)}"
+            }
+    
+    async def test_workflow_cancellation(self) -> Dict[str, Any]:
+        """Test workflow cancellation"""
+        if not self.workflow_ids:
+            return {
+                'success': False,
+                'message': "No workflows available for cancellation test"
+            }
+        
+        # Submit a new workflow specifically for cancellation
+        workflow_data = {
+            "name": "Cancellation Test Workflow",
+            "requirements": "This workflow will be cancelled during execution to test cancellation functionality.",
+            "language": "python",
+            "priority": "low"
+        }
+        
+        async with self.session.post(
+            f"{self.base_url}/api/distributed/workflows",
+            json=workflow_data
+        ) as response:
+            if response.status != 200:
+                return {
+                    'success': False,
+                    'message': "Failed to submit workflow for cancellation test"
+                }
+            
+            result = await response.json()
+            workflow_id = result.get('workflow_id')
+            
+            if not workflow_id:
+                return {
+                    'success': False,
+                    'message': "No workflow_id returned for cancellation test"
+                }
+        
+        # Wait a bit to let the workflow start
+        await asyncio.sleep(2)
+        
+        # Cancel the workflow
+        async with self.session.post(
+            f"{self.base_url}/api/distributed/workflows/{workflow_id}/cancel"
+        ) as response:
+            if response.status != 200:
+                return {
+                    'success': False,
+                    'message': f"Cancellation failed with status {response.status}"
+                }
+            
+            cancel_result = await response.json()
+            
+            return {
+                'success': True,
+                'message': f"Workflow cancelled successfully: {workflow_id}",
+                'data': cancel_result
+            }
+    
+    async def test_performance_metrics(self) -> Dict[str, Any]:
+        """Test performance metrics endpoint"""
+        async with self.session.get(f"{self.base_url}/api/distributed/performance/metrics") as response:
+            if response.status != 200:
+                return {
+                    'success': False,
+                    'message': f"Performance metrics failed with status {response.status}"
+                }
+            
+            metrics_data = await response.json()
+            
+            required_fields = ['total_workflows', 'completed_workflows', 'agent_performance']
+            missing_fields = [field for field in required_fields if field not in metrics_data]
+            
+            if missing_fields:
+                return {
+                    'success': False,
+                    'message': f"Missing required metrics fields: {missing_fields}",
+                    'data': metrics_data
+                }
+            
+            return {
+                'success': True,
+                'message': "Performance metrics retrieved successfully",
+                'data': metrics_data
+            }
+    
+    async def test_cluster_optimization(self) -> Dict[str, Any]:
+        """Test cluster optimization trigger"""
+        async with self.session.post(f"{self.base_url}/api/distributed/cluster/optimize") as response:
+            if response.status != 200:
+                return {
+                    'success': False,
+                    'message': f"Cluster optimization failed with status {response.status}"
+                }
+            
+            result = await response.json()
+            
+            return {
+                'success': True,
+                'message': "Cluster optimization triggered successfully",
+                'data': result
+            }
+    
+    async def test_workflow_listing(self) -> Dict[str, Any]:
+        """Test workflow listing functionality"""
+        async with self.session.get(f"{self.base_url}/api/distributed/workflows") as response:
+            if response.status != 200:
+                return {
+                    'success': False,
+                    'message': f"Workflow listing failed with status {response.status}"
+                }
+            
+            workflows = await response.json()
+            
+            if not isinstance(workflows, list):
+                return {
+                    'success': False,
+                    'message': "Workflow listing should return a list"
+                }
+            
+            return {
+                'success': True,
+                'message': f"Retrieved {len(workflows)} workflows",
+                'data': {'workflow_count': len(workflows), 'workflows': workflows[:5]}  # First 5 for brevity
+            }
+    
+    async def test_agent_health_monitoring(self) -> Dict[str, Any]:
+        """Test individual agent health monitoring"""
+        # First get cluster status to get agent list
+        async with self.session.get(f"{self.base_url}/api/distributed/cluster/status") as response:
+            if response.status != 200:
+                return {
+                    'success': False,
+                    'message': "Failed to get cluster status for agent testing"
+                }
+            
+            cluster_data = await response.json()
+            agents = cluster_data.get('agents', [])
+            
+            if not agents:
+                return {
+                    'success': False,
+                    'message': "No agents found for health monitoring test"
+                }
+        
+        # Test individual agent health
+        agent_results = []
+        for agent in agents[:3]:  # Test first 3 agents
+            agent_id = agent.get('id')
+            if agent_id:
+                async with self.session.get(
+                    f"{self.base_url}/api/distributed/agents/{agent_id}/tasks"
+                ) as response:
+                    agent_results.append({
+                        'agent_id': agent_id,
+                        'status_code': response.status,
+                        'health_status': agent.get('health_status', 'unknown')
+                    })
+        
+        successful_checks = sum(1 for result in agent_results if result['status_code'] == 200)
+        
+        return {
+            'success': successful_checks > 0,
+            'message': f"Agent health monitoring: {successful_checks}/{len(agent_results)} agents responding",
+            'data': {'agent_results': agent_results}
+        }
+    
+    async def run_comprehensive_test_suite(self) -> Dict[str, Any]:
+        """Run the complete test suite"""
+        logger.info("🚀 Starting Comprehensive Distributed Workflow Test Suite")
+        logger.info("=" * 60)
+        
+        # Define test sequence
+        tests = [
+            ("System Health Check", self.test_system_health),
+            ("Cluster Status", self.test_cluster_status),
+            ("Single Workflow Submission", self.test_workflow_submission),
+            ("Multiple Workflow Submission", self.test_multiple_workflow_submission),
+            ("Workflow Status Tracking", self.test_workflow_status_tracking),
+            ("Workflow Cancellation", self.test_workflow_cancellation),
+            ("Performance Metrics", self.test_performance_metrics),
+            ("Cluster Optimization", self.test_cluster_optimization),
+            ("Workflow Listing", self.test_workflow_listing),
+            ("Agent Health Monitoring", self.test_agent_health_monitoring),
+        ]
+        
+        # Run all tests
+        for test_name, test_func in tests:
+            await self.run_test(test_name, test_func)
+            await asyncio.sleep(1)  # Brief pause between tests
+        
+        # Generate summary
+        total_tests = len(self.test_results)
+        passed_tests = sum(1 for result in self.test_results if result.success)
+        failed_tests = total_tests - passed_tests
+        total_duration = sum(result.duration for result in self.test_results)
+        
+        summary = {
+            'total_tests': total_tests,
+            'passed_tests': passed_tests,
+            'failed_tests': failed_tests,
+            'success_rate': (passed_tests / total_tests) * 100 if total_tests > 0 else 0,
+            'total_duration': total_duration,
+            'workflow_ids_created': self.workflow_ids
+        }
+        
+        logger.info("=" * 60)
+        logger.info("📊 Test Suite Summary:")
+        logger.info(f"   Total Tests: {total_tests}")
+        logger.info(f"   Passed: {passed_tests}")
+        logger.info(f"   Failed: {failed_tests}")
+        logger.info(f"   Success Rate: {summary['success_rate']:.1f}%")
+        logger.info(f"   Total Duration: {total_duration:.2f}s")
+        logger.info(f"   Workflows Created: {len(self.workflow_ids)}")
+        
+        if failed_tests > 0:
+            logger.error("❌ Failed Tests:")
+            for result in self.test_results:
+                if not result.success:
+                    logger.error(f"   - {result.name}: {result.message}")
+        
+        return summary
+    
+    def generate_detailed_report(self) -> str:
+        """Generate a detailed test report"""
+        report = []
+        report.append("# Hive Distributed Workflow System - Test Report")
+        report.append(f"Generated: {datetime.now().isoformat()}")
+        report.append("")
+        
+        # Summary
+        total_tests = len(self.test_results)
+        passed_tests = sum(1 for result in self.test_results if result.success)
+        failed_tests = total_tests - passed_tests
+        total_duration = sum(result.duration for result in self.test_results)
+        
+        report.append("## Test Summary")
+        report.append(f"- **Total Tests**: {total_tests}")
+        report.append(f"- **Passed**: {passed_tests}")
+        report.append(f"- **Failed**: {failed_tests}")
+        report.append(f"- **Success Rate**: {(passed_tests/total_tests)*100:.1f}%")
+        report.append(f"- **Total Duration**: {total_duration:.2f} seconds")
+        report.append(f"- **Workflows Created**: {len(self.workflow_ids)}")
+        report.append("")
+        
+        # Detailed results
+        report.append("## Detailed Test Results")
+        for result in self.test_results:
+            status = "✅ PASS" if result.success else "❌ FAIL"
+            report.append(f"### {result.name} - {status}")
+            report.append(f"- **Duration**: {result.duration:.2f}s")
+            report.append(f"- **Message**: {result.message}")
+            if result.data:
+                report.append(f"- **Data**: ```json\n{json.dumps(result.data, indent=2)}\n```")
+            report.append("")
+        
+        # Recommendations
+        report.append("## Recommendations")
+        if failed_tests == 0:
+            report.append("🎉 All tests passed! The distributed workflow system is functioning correctly.")
+        else:
+            report.append("⚠️ Some tests failed. Please review the failed tests and address any issues.")
+            report.append("")
+            report.append("### Failed Tests:")
+            for result in self.test_results:
+                if not result.success:
+                    report.append(f"- **{result.name}**: {result.message}")
+        
+        return "\n".join(report)
+
+
+async def main():
+    """Main test execution function"""
+    parser = argparse.ArgumentParser(description="Test Hive Distributed Workflow System")
+    parser.add_argument(
+        "--url",
+        default="http://localhost:8000",
+        help="Base URL for the Hive API (default: http://localhost:8000)"
+    )
+    parser.add_argument(
+        "--output",
+        help="Output file for detailed test report"
+    )
+    parser.add_argument(
+        "--single-test",
+        help="Run a single test by name"
+    )
+    
+    args = parser.parse_args()
+    
+    try:
+        async with DistributedWorkflowTester(args.url) as tester:
+            if args.single_test:
+                # Run single test
+                test_methods = {
+                    'health': tester.test_system_health,
+                    'cluster': tester.test_cluster_status,
+                    'submit': tester.test_workflow_submission,
+                    'multiple': tester.test_multiple_workflow_submission,
+                    'status': tester.test_workflow_status_tracking,
+                    'cancel': tester.test_workflow_cancellation,
+                    'metrics': tester.test_performance_metrics,
+                    'optimize': tester.test_cluster_optimization,
+                    'list': tester.test_workflow_listing,
+                    'agents': tester.test_agent_health_monitoring,
+                }
+                
+                if args.single_test in test_methods:
+                    await tester.run_test(args.single_test, test_methods[args.single_test])
+                else:
+                    logger.error(f"Unknown test: {args.single_test}")
+                    logger.info(f"Available tests: {', '.join(test_methods.keys())}")
+                    return 1
+            else:
+                # Run full test suite
+                summary = await tester.run_comprehensive_test_suite()
+            
+            # Generate and save report if requested
+            if args.output:
+                report = tester.generate_detailed_report()
+                with open(args.output, 'w') as f:
+                    f.write(report)
+                logger.info(f"📄 Detailed report saved to: {args.output}")
+            
+            # Return appropriate exit code
+            if args.single_test:
+                return 0 if tester.test_results[-1].success else 1
+            else:
+                return 0 if summary['failed_tests'] == 0 else 1
+                
+    except KeyboardInterrupt:
+        logger.info("❌ Test execution interrupted by user")
+        return 1
+    except Exception as e:
+        logger.error(f"💥 Test execution failed: {str(e)}")
+        logger.debug(traceback.format_exc())
+        return 1
+
+
+if __name__ == "__main__":
+    exit_code = asyncio.run(main())
+    sys.exit(exit_code)