Add environment configuration and local development documentation

- Parameterize CORS_ORIGINS in docker-compose.swarm.yml
- Add .env.example with configuration options
- Create comprehensive LOCAL_DEVELOPMENT.md guide
- Update README.md with environment variable documentation
- Provide alternatives for local development without production domain

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-07-10 18:20:52 +10:00
parent daf0766e29
commit f3cbb5c6f7
50 changed files with 6339 additions and 528 deletions

View File

@@ -15,7 +15,7 @@ from typing import Dict, List, Optional, Tuple
import time
# Configuration
HIVE_API_URL = "http://localhost:8087"
HIVE_API_URL = "https://hive.home.deepblack.cloud"
SUBNET_BASE = "192.168.1"
OLLAMA_PORT = 11434
DISCOVERY_TIMEOUT = 3
@@ -167,37 +167,37 @@ class AgentDiscovery:
return discovered
def determine_agent_specialty(self, models: List[str], hostname: str) -> str:
"""Determine agent specialty based on models and hostname"""
"""Determine agent specialty based on models and hostname using valid AgentType values"""
model_str = " ".join(models).lower()
hostname_lower = hostname.lower()
# Check hostname patterns
# Check hostname patterns - map to valid Hive AgentType values
if "walnut" in hostname_lower:
return "Senior Full-Stack Development & Architecture"
return "pytorch_dev" # Full-stack development
elif "acacia" in hostname_lower:
return "Infrastructure, DevOps & System Architecture"
return "profiler" # Infrastructure/DevOps
elif "ironwood" in hostname_lower:
return "Backend Development & Code Analysis"
return "pytorch_dev" # Backend development
elif "forsteinet" in hostname_lower:
return "AI Compute & Processing"
return "kernel_dev" # AI Compute
elif "rosewood" in hostname_lower:
return "Quality Assurance, Testing & Code Review"
return "tester" # QA and Testing
elif "oak" in hostname_lower:
return "iOS/macOS Development & Apple Ecosystem"
return "docs_writer" # iOS/macOS Development
# Check model patterns
if "starcoder" in model_str:
return "Full-Stack Development & Code Generation"
if "starcoder" in model_str or "codegemma" in model_str:
return "pytorch_dev" # Code generation
elif "deepseek-coder" in model_str:
return "Backend Development & Code Analysis"
return "pytorch_dev" # Backend development
elif "deepseek-r1" in model_str:
return "Infrastructure & System Architecture"
return "profiler" # Analysis and architecture
elif "devstral" in model_str:
return "Development & Code Review"
return "tester" # Development review
elif "llava" in model_str:
return "Vision & Multimodal Analysis"
return "docs_writer" # Vision/documentation
else:
return "General AI Development"
return "pytorch_dev" # Default to pytorch development
def determine_capabilities(self, specialty: str) -> List[str]:
"""Determine capabilities based on specialty"""
@@ -240,9 +240,11 @@ class AgentDiscovery:
agent_data = {
"id": hostname.lower().replace(".", "_"),
"name": f"{hostname} Ollama Agent",
"endpoint": agent_info["endpoint"],
"model": agent_info["primary_model"],
"specialty": specialty,
"specialization": specialty, # For compatibility
"capabilities": capabilities,
"available_models": agent_info["models"],
"model_count": agent_info["model_count"],
@@ -251,6 +253,7 @@ class AgentDiscovery:
"status": "available",
"current_tasks": 0,
"max_concurrent": 3,
"agent_type": "ollama",
"discovered_at": time.time()
}

View File

@@ -0,0 +1,481 @@
#!/bin/bash
# Distributed Hive Workflow Deployment Script
# Deploys the enhanced distributed development workflow system across the cluster
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Configuration
PROJECT_ROOT="/home/tony/AI/projects/hive"
CLUSTER_NODES=("192.168.1.72" "192.168.1.27" "192.168.1.113" "192.168.1.132" "192.168.1.106")
CLUSTER_NAMES=("ACACIA" "WALNUT" "IRONWOOD" "ROSEWOOD" "FORSTEINET")
SSH_USER="tony"
SSH_PASS="silverfrond[1392]"
# Logging function
log() {
echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1"
}
error() {
echo -e "${RED}[ERROR]${NC} $1"
}
success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
# Check prerequisites
check_prerequisites() {
log "Checking prerequisites..."
# Check if project directory exists
if [ ! -d "$PROJECT_ROOT" ]; then
error "Project directory not found: $PROJECT_ROOT"
exit 1
fi
# Check if Redis is installed
if ! command -v redis-server &> /dev/null; then
warning "Redis server not found. Installing..."
sudo apt update && sudo apt install -y redis-server
fi
# Check if Docker is available
if ! command -v docker &> /dev/null; then
error "Docker not found. Please install Docker first."
exit 1
fi
# Check Python dependencies
if [ ! -f "$PROJECT_ROOT/backend/requirements.txt" ]; then
error "Requirements file not found"
exit 1
fi
success "Prerequisites check completed"
}
# Install Python dependencies
install_dependencies() {
log "Installing Python dependencies..."
cd "$PROJECT_ROOT/backend"
# Create virtual environment if it doesn't exist
if [ ! -d "venv" ]; then
python3 -m venv venv
fi
# Activate virtual environment and install dependencies
source venv/bin/activate
pip install --upgrade pip
pip install -r requirements.txt
# Install additional distributed workflow dependencies
pip install redis aioredis prometheus-client
success "Dependencies installed"
}
# Setup Redis for distributed coordination
setup_redis() {
log "Setting up Redis for distributed coordination..."
# Start Redis service
sudo systemctl start redis-server
sudo systemctl enable redis-server
# Configure Redis for cluster coordination
sudo tee /etc/redis/redis.conf.d/hive-distributed.conf > /dev/null <<EOF
# Hive Distributed Workflow Configuration
maxmemory 512mb
maxmemory-policy allkeys-lru
save 900 1
save 300 10
save 60 10000
EOF
# Restart Redis with new configuration
sudo systemctl restart redis-server
# Test Redis connection
if redis-cli ping | grep -q "PONG"; then
success "Redis configured and running"
else
error "Redis setup failed"
exit 1
fi
}
# Check cluster connectivity
check_cluster_connectivity() {
log "Checking cluster connectivity..."
for i in "${!CLUSTER_NODES[@]}"; do
node="${CLUSTER_NODES[$i]}"
name="${CLUSTER_NAMES[$i]}"
log "Testing connection to $name ($node)..."
if sshpass -p "$SSH_PASS" ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no "$SSH_USER@$node" "echo 'Connection test successful'" > /dev/null 2>&1; then
success "$name ($node) - Connected"
else
warning "$name ($node) - Connection failed"
fi
done
}
# Deploy configuration to cluster nodes
deploy_cluster_config() {
log "Deploying configuration to cluster nodes..."
# Create configuration package
cd "$PROJECT_ROOT"
tar -czf /tmp/hive-distributed-config.tar.gz config/distributed_config.yaml
for i in "${!CLUSTER_NODES[@]}"; do
node="${CLUSTER_NODES[$i]}"
name="${CLUSTER_NAMES[$i]}"
log "Deploying to $name ($node)..."
# Copy configuration
sshpass -p "$SSH_PASS" scp -o StrictHostKeyChecking=no /tmp/hive-distributed-config.tar.gz "$SSH_USER@$node:/tmp/"
# Extract and setup configuration
sshpass -p "$SSH_PASS" ssh -o StrictHostKeyChecking=no "$SSH_USER@$node" "
mkdir -p /home/$SSH_USER/AI/projects/hive/config
cd /home/$SSH_USER/AI/projects/hive/config
tar -xzf /tmp/hive-distributed-config.tar.gz
chmod 644 distributed_config.yaml
"
success "✓ Configuration deployed to $name"
done
# Clean up
rm -f /tmp/hive-distributed-config.tar.gz
}
# Update Ollama configurations for distributed workflows
update_ollama_configs() {
log "Updating Ollama configurations for distributed workflows..."
for i in "${!CLUSTER_NODES[@]}"; do
node="${CLUSTER_NODES[$i]}"
name="${CLUSTER_NAMES[$i]}"
log "Updating Ollama on $name ($node)..."
# Update Ollama service configuration for better distributed performance
sshpass -p "$SSH_PASS" ssh -o StrictHostKeyChecking=no "$SSH_USER@$node" "
# Create Ollama service override directory if it doesn't exist
sudo mkdir -p /etc/systemd/system/ollama.service.d/
# Create distributed workflow optimizations
sudo tee /etc/systemd/system/ollama.service.d/distributed.conf > /dev/null <<'OVERRIDE_EOF'
[Service]
Environment=\"OLLAMA_NUM_PARALLEL=4\"
Environment=\"OLLAMA_MAX_QUEUE=10\"
Environment=\"OLLAMA_KEEP_ALIVE=10m\"
Environment=\"OLLAMA_HOST=0.0.0.0:11434\"
OVERRIDE_EOF
# Reload systemd and restart Ollama
sudo systemctl daemon-reload
sudo systemctl restart ollama || true
"
success "✓ Ollama updated on $name"
done
}
# Start the distributed coordinator
start_distributed_system() {
log "Starting distributed workflow system..."
cd "$PROJECT_ROOT/backend"
source venv/bin/activate
# Start the main Hive application with distributed workflows
export PYTHONPATH="$PROJECT_ROOT/backend:$PYTHONPATH"
export HIVE_CONFIG_PATH="$PROJECT_ROOT/config/distributed_config.yaml"
# Run database migrations
log "Running database migrations..."
python -c "
from app.core.database import init_database_with_retry
init_database_with_retry()
print('Database initialized')
"
# Start the application in the background
log "Starting Hive with distributed workflows..."
nohup python -m uvicorn app.main:app \
--host 0.0.0.0 \
--port 8000 \
--reload \
--log-level info > /tmp/hive-distributed.log 2>&1 &
HIVE_PID=$!
echo $HIVE_PID > /tmp/hive-distributed.pid
# Wait for startup
sleep 10
# Check if the service is running
if kill -0 $HIVE_PID 2>/dev/null; then
success "Distributed workflow system started (PID: $HIVE_PID)"
log "Application logs: tail -f /tmp/hive-distributed.log"
log "Health check: curl http://localhost:8000/health"
log "Distributed API: curl http://localhost:8000/api/distributed/cluster/status"
else
error "Failed to start distributed workflow system"
exit 1
fi
}
# Run health checks
run_health_checks() {
log "Running health checks..."
# Wait for services to fully start
sleep 15
# Check main API
if curl -s http://localhost:8000/health > /dev/null; then
success "✓ Main API responding"
else
error "✗ Main API not responding"
fi
# Check distributed API
if curl -s http://localhost:8000/api/distributed/cluster/status > /dev/null; then
success "✓ Distributed API responding"
else
error "✗ Distributed API not responding"
fi
# Check Redis connection
if redis-cli ping | grep -q "PONG"; then
success "✓ Redis connection working"
else
error "✗ Redis connection failed"
fi
# Check cluster agent connectivity
response=$(curl -s http://localhost:8000/api/distributed/cluster/status || echo "{}")
healthy_agents=$(echo "$response" | python3 -c "
import sys, json
try:
data = json.load(sys.stdin)
print(data.get('healthy_agents', 0))
except:
print(0)
" || echo "0")
if [ "$healthy_agents" -gt 0 ]; then
success "$healthy_agents cluster agents healthy"
else
warning "✗ No healthy cluster agents found"
fi
}
# Create systemd service for production deployment
create_systemd_service() {
log "Creating systemd service for production deployment..."
sudo tee /etc/systemd/system/hive-distributed.service > /dev/null <<EOF
[Unit]
Description=Hive Distributed Workflow System
After=network.target redis.service
Wants=redis.service
[Service]
Type=exec
User=$USER
Group=$USER
WorkingDirectory=$PROJECT_ROOT/backend
Environment=PYTHONPATH=$PROJECT_ROOT/backend
Environment=HIVE_CONFIG_PATH=$PROJECT_ROOT/config/distributed_config.yaml
ExecStart=$PROJECT_ROOT/backend/venv/bin/python -m uvicorn app.main:app --host 0.0.0.0 --port 8000
ExecReload=/bin/kill -HUP \$MAINPID
Restart=always
RestartSec=5
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target
EOF
# Enable the service
sudo systemctl daemon-reload
sudo systemctl enable hive-distributed.service
success "Systemd service created and enabled"
log "Use 'sudo systemctl start hive-distributed' to start the service"
log "Use 'sudo systemctl status hive-distributed' to check status"
}
# Generate deployment report
generate_report() {
log "Generating deployment report..."
report_file="/tmp/hive-distributed-deployment-report.txt"
cat > "$report_file" <<EOF
# Hive Distributed Workflow System - Deployment Report
Generated: $(date)
## Deployment Summary
- Project Directory: $PROJECT_ROOT
- Configuration: $PROJECT_ROOT/config/distributed_config.yaml
- Log File: /tmp/hive-distributed.log
- PID File: /tmp/hive-distributed.pid
## Cluster Configuration
EOF
for i in "${!CLUSTER_NODES[@]}"; do
node="${CLUSTER_NODES[$i]}"
name="${CLUSTER_NAMES[$i]}"
echo "- $name: $node" >> "$report_file"
done
cat >> "$report_file" <<EOF
## Service Endpoints
- Main API: http://localhost:8000
- Health Check: http://localhost:8000/health
- API Documentation: http://localhost:8000/docs
- Distributed Workflows: http://localhost:8000/api/distributed/workflows
- Cluster Status: http://localhost:8000/api/distributed/cluster/status
- Performance Metrics: http://localhost:8000/api/distributed/performance/metrics
## Management Commands
- Start Service: sudo systemctl start hive-distributed
- Stop Service: sudo systemctl stop hive-distributed
- Restart Service: sudo systemctl restart hive-distributed
- View Logs: sudo journalctl -u hive-distributed -f
- View Application Logs: tail -f /tmp/hive-distributed.log
## Cluster Operations
- Check Cluster Status: curl http://localhost:8000/api/distributed/cluster/status
- Submit Workflow: POST to /api/distributed/workflows
- List Workflows: GET /api/distributed/workflows
- Optimize Cluster: POST to /api/distributed/cluster/optimize
## Troubleshooting
- Redis Status: sudo systemctl status redis-server
- Redis Connection: redis-cli ping
- Agent Connectivity: Check Ollama services on cluster nodes
- Application Health: curl http://localhost:8000/health
## Next Steps
1. Test distributed workflow submission
2. Monitor cluster performance metrics
3. Configure production security settings
4. Set up automated backups
5. Implement monitoring and alerting
EOF
success "Deployment report generated: $report_file"
cat "$report_file"
}
# Main deployment function
main() {
echo -e "${GREEN}"
echo "╔══════════════════════════════════════════════════════════════╗"
echo "║ Hive Distributed Workflow Deployment ║"
echo "║ ║"
echo "║ Deploying cluster-wide development workflow orchestration ║"
echo "╚══════════════════════════════════════════════════════════════╝"
echo -e "${NC}"
log "Starting deployment of Hive Distributed Workflow System..."
# Run deployment steps
check_prerequisites
install_dependencies
setup_redis
check_cluster_connectivity
deploy_cluster_config
update_ollama_configs
start_distributed_system
run_health_checks
create_systemd_service
generate_report
echo -e "${GREEN}"
echo "╔══════════════════════════════════════════════════════════════╗"
echo "║ Deployment Completed! ║"
echo "║ ║"
echo "║ 🚀 Hive Distributed Workflow System is now running ║"
echo "║ 📊 Visit http://localhost:8000/docs for API documentation ║"
echo "║ 🌐 Cluster status: http://localhost:8000/api/distributed/ ║"
echo "║ cluster/status ║"
echo "╚══════════════════════════════════════════════════════════════╝"
echo -e "${NC}"
}
# Handle script arguments
case "${1:-deploy}" in
"deploy")
main
;;
"start")
log "Starting Hive Distributed Workflow System..."
sudo systemctl start hive-distributed
;;
"stop")
log "Stopping Hive Distributed Workflow System..."
sudo systemctl stop hive-distributed
if [ -f /tmp/hive-distributed.pid ]; then
kill $(cat /tmp/hive-distributed.pid) 2>/dev/null || true
rm -f /tmp/hive-distributed.pid
fi
;;
"status")
log "Checking system status..."
sudo systemctl status hive-distributed
;;
"logs")
log "Showing application logs..."
tail -f /tmp/hive-distributed.log
;;
"health")
log "Running health checks..."
run_health_checks
;;
"cluster")
log "Checking cluster status..."
curl -s http://localhost:8000/api/distributed/cluster/status | python3 -m json.tool
;;
*)
echo "Usage: $0 {deploy|start|stop|status|logs|health|cluster}"
echo ""
echo "Commands:"
echo " deploy - Full deployment of distributed workflow system"
echo " start - Start the service"
echo " stop - Stop the service"
echo " status - Show service status"
echo " logs - Show application logs"
echo " health - Run health checks"
echo " cluster - Show cluster status"
exit 1
;;
esac

View File

@@ -0,0 +1,669 @@
#!/usr/bin/env python3
"""
Comprehensive Testing Suite for Hive Distributed Workflows
Tests all aspects of the distributed development workflow system
"""
import asyncio
import aiohttp
import json
import time
import sys
import logging
from datetime import datetime
from typing import Dict, List, Any, Optional
from dataclasses import dataclass
import argparse
import traceback
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
@dataclass
class TestResult:
"""Test result data class"""
name: str
success: bool
duration: float
message: str
data: Optional[Dict[str, Any]] = None
class DistributedWorkflowTester:
"""Comprehensive tester for distributed workflow system"""
def __init__(self, base_url: str = "http://localhost:8000"):
self.base_url = base_url
self.session: Optional[aiohttp.ClientSession] = None
self.test_results: List[TestResult] = []
self.workflow_ids: List[str] = []
async def __aenter__(self):
"""Async context manager entry"""
self.session = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=300) # 5 minute timeout
)
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Async context manager exit"""
if self.session:
await self.session.close()
async def run_test(self, test_name: str, test_func, *args, **kwargs) -> TestResult:
"""Run a single test with error handling and timing"""
logger.info(f"🧪 Running test: {test_name}")
start_time = time.time()
try:
result = await test_func(*args, **kwargs)
duration = time.time() - start_time
if isinstance(result, bool):
success = result
message = "Test passed" if success else "Test failed"
data = None
elif isinstance(result, dict):
success = result.get('success', True)
message = result.get('message', 'Test completed')
data = result.get('data')
else:
success = True
message = str(result)
data = None
test_result = TestResult(
name=test_name,
success=success,
duration=duration,
message=message,
data=data
)
self.test_results.append(test_result)
if success:
logger.info(f"{test_name} - {message} ({duration:.2f}s)")
else:
logger.error(f"{test_name} - {message} ({duration:.2f}s)")
return test_result
except Exception as e:
duration = time.time() - start_time
error_message = f"Exception: {str(e)}"
logger.error(f"💥 {test_name} - {error_message} ({duration:.2f}s)")
logger.debug(traceback.format_exc())
test_result = TestResult(
name=test_name,
success=False,
duration=duration,
message=error_message
)
self.test_results.append(test_result)
return test_result
async def test_system_health(self) -> Dict[str, Any]:
"""Test basic system health"""
async with self.session.get(f"{self.base_url}/health") as response:
if response.status != 200:
return {
'success': False,
'message': f"Health check failed with status {response.status}"
}
health_data = await response.json()
# Check component health
components = health_data.get('components', {})
unhealthy_components = [
name for name, status in components.items()
if status not in ['operational', 'healthy']
]
if unhealthy_components:
return {
'success': False,
'message': f"Unhealthy components: {unhealthy_components}",
'data': health_data
}
return {
'success': True,
'message': "All system components healthy",
'data': health_data
}
async def test_cluster_status(self) -> Dict[str, Any]:
"""Test cluster status endpoint"""
async with self.session.get(f"{self.base_url}/api/distributed/cluster/status") as response:
if response.status != 200:
return {
'success': False,
'message': f"Cluster status failed with status {response.status}"
}
cluster_data = await response.json()
total_agents = cluster_data.get('total_agents', 0)
healthy_agents = cluster_data.get('healthy_agents', 0)
if total_agents == 0:
return {
'success': False,
'message': "No agents found in cluster",
'data': cluster_data
}
if healthy_agents == 0:
return {
'success': False,
'message': "No healthy agents in cluster",
'data': cluster_data
}
return {
'success': True,
'message': f"{healthy_agents}/{total_agents} agents healthy",
'data': cluster_data
}
async def test_workflow_submission(self) -> Dict[str, Any]:
"""Test workflow submission"""
workflow_data = {
"name": "Test REST API Development",
"requirements": "Create a simple REST API with user authentication, CRUD operations for a todo list, and comprehensive error handling.",
"context": "This is a test workflow to validate the distributed system functionality.",
"language": "python",
"priority": "high"
}
async with self.session.post(
f"{self.base_url}/api/distributed/workflows",
json=workflow_data
) as response:
if response.status != 200:
return {
'success': False,
'message': f"Workflow submission failed with status {response.status}"
}
result = await response.json()
workflow_id = result.get('workflow_id')
if not workflow_id:
return {
'success': False,
'message': "No workflow_id returned",
'data': result
}
self.workflow_ids.append(workflow_id)
return {
'success': True,
'message': f"Workflow submitted successfully: {workflow_id}",
'data': result
}
async def test_workflow_status_tracking(self) -> Dict[str, Any]:
"""Test workflow status tracking"""
if not self.workflow_ids:
return {
'success': False,
'message': "No workflows available for status tracking"
}
workflow_id = self.workflow_ids[0]
# Poll workflow status for up to 2 minutes
max_wait_time = 120 # 2 minutes
poll_interval = 5 # 5 seconds
start_time = time.time()
status_changes = []
while time.time() - start_time < max_wait_time:
async with self.session.get(
f"{self.base_url}/api/distributed/workflows/{workflow_id}"
) as response:
if response.status != 200:
return {
'success': False,
'message': f"Status check failed with status {response.status}"
}
status_data = await response.json()
current_status = status_data.get('status', 'unknown')
progress = status_data.get('progress', 0)
status_changes.append({
'timestamp': datetime.now().isoformat(),
'status': current_status,
'progress': progress,
'completed_tasks': status_data.get('completed_tasks', 0),
'total_tasks': status_data.get('total_tasks', 0)
})
logger.info(f"Workflow {workflow_id}: {current_status} ({progress:.1f}%)")
if current_status in ['completed', 'failed']:
break
await asyncio.sleep(poll_interval)
final_status = status_changes[-1] if status_changes else {}
return {
'success': True,
'message': f"Status tracking completed. Final status: {final_status.get('status', 'unknown')}",
'data': {
'workflow_id': workflow_id,
'status_changes': status_changes,
'final_status': final_status
}
}
async def test_multiple_workflow_submission(self) -> Dict[str, Any]:
"""Test concurrent workflow submission"""
workflows = [
{
"name": "Frontend React App",
"requirements": "Create a React application with TypeScript, routing, and state management.",
"language": "typescript",
"priority": "normal"
},
{
"name": "Python Data Analysis",
"requirements": "Create a data analysis script with pandas, visualization, and reporting.",
"language": "python",
"priority": "normal"
},
{
"name": "Microservice Architecture",
"requirements": "Design a microservices system with API gateway and service discovery.",
"language": "go",
"priority": "high"
}
]
submission_tasks = []
for workflow in workflows:
task = self.session.post(
f"{self.base_url}/api/distributed/workflows",
json=workflow
)
submission_tasks.append(task)
try:
responses = await asyncio.gather(*submission_tasks)
submitted_workflows = []
for i, response in enumerate(responses):
if response.status == 200:
result = await response.json()
workflow_id = result.get('workflow_id')
if workflow_id:
self.workflow_ids.append(workflow_id)
submitted_workflows.append({
'name': workflows[i]['name'],
'workflow_id': workflow_id
})
response.close()
return {
'success': len(submitted_workflows) == len(workflows),
'message': f"Submitted {len(submitted_workflows)}/{len(workflows)} workflows concurrently",
'data': {'submitted_workflows': submitted_workflows}
}
except Exception as e:
return {
'success': False,
'message': f"Concurrent submission failed: {str(e)}"
}
async def test_workflow_cancellation(self) -> Dict[str, Any]:
"""Test workflow cancellation"""
if not self.workflow_ids:
return {
'success': False,
'message': "No workflows available for cancellation test"
}
# Submit a new workflow specifically for cancellation
workflow_data = {
"name": "Cancellation Test Workflow",
"requirements": "This workflow will be cancelled during execution to test cancellation functionality.",
"language": "python",
"priority": "low"
}
async with self.session.post(
f"{self.base_url}/api/distributed/workflows",
json=workflow_data
) as response:
if response.status != 200:
return {
'success': False,
'message': "Failed to submit workflow for cancellation test"
}
result = await response.json()
workflow_id = result.get('workflow_id')
if not workflow_id:
return {
'success': False,
'message': "No workflow_id returned for cancellation test"
}
# Wait a bit to let the workflow start
await asyncio.sleep(2)
# Cancel the workflow
async with self.session.post(
f"{self.base_url}/api/distributed/workflows/{workflow_id}/cancel"
) as response:
if response.status != 200:
return {
'success': False,
'message': f"Cancellation failed with status {response.status}"
}
cancel_result = await response.json()
return {
'success': True,
'message': f"Workflow cancelled successfully: {workflow_id}",
'data': cancel_result
}
async def test_performance_metrics(self) -> Dict[str, Any]:
"""Test performance metrics endpoint"""
async with self.session.get(f"{self.base_url}/api/distributed/performance/metrics") as response:
if response.status != 200:
return {
'success': False,
'message': f"Performance metrics failed with status {response.status}"
}
metrics_data = await response.json()
required_fields = ['total_workflows', 'completed_workflows', 'agent_performance']
missing_fields = [field for field in required_fields if field not in metrics_data]
if missing_fields:
return {
'success': False,
'message': f"Missing required metrics fields: {missing_fields}",
'data': metrics_data
}
return {
'success': True,
'message': "Performance metrics retrieved successfully",
'data': metrics_data
}
async def test_cluster_optimization(self) -> Dict[str, Any]:
"""Test cluster optimization trigger"""
async with self.session.post(f"{self.base_url}/api/distributed/cluster/optimize") as response:
if response.status != 200:
return {
'success': False,
'message': f"Cluster optimization failed with status {response.status}"
}
result = await response.json()
return {
'success': True,
'message': "Cluster optimization triggered successfully",
'data': result
}
async def test_workflow_listing(self) -> Dict[str, Any]:
"""Test workflow listing functionality"""
async with self.session.get(f"{self.base_url}/api/distributed/workflows") as response:
if response.status != 200:
return {
'success': False,
'message': f"Workflow listing failed with status {response.status}"
}
workflows = await response.json()
if not isinstance(workflows, list):
return {
'success': False,
'message': "Workflow listing should return a list"
}
return {
'success': True,
'message': f"Retrieved {len(workflows)} workflows",
'data': {'workflow_count': len(workflows), 'workflows': workflows[:5]} # First 5 for brevity
}
async def test_agent_health_monitoring(self) -> Dict[str, Any]:
"""Test individual agent health monitoring"""
# First get cluster status to get agent list
async with self.session.get(f"{self.base_url}/api/distributed/cluster/status") as response:
if response.status != 200:
return {
'success': False,
'message': "Failed to get cluster status for agent testing"
}
cluster_data = await response.json()
agents = cluster_data.get('agents', [])
if not agents:
return {
'success': False,
'message': "No agents found for health monitoring test"
}
# Test individual agent health
agent_results = []
for agent in agents[:3]: # Test first 3 agents
agent_id = agent.get('id')
if agent_id:
async with self.session.get(
f"{self.base_url}/api/distributed/agents/{agent_id}/tasks"
) as response:
agent_results.append({
'agent_id': agent_id,
'status_code': response.status,
'health_status': agent.get('health_status', 'unknown')
})
successful_checks = sum(1 for result in agent_results if result['status_code'] == 200)
return {
'success': successful_checks > 0,
'message': f"Agent health monitoring: {successful_checks}/{len(agent_results)} agents responding",
'data': {'agent_results': agent_results}
}
async def run_comprehensive_test_suite(self) -> Dict[str, Any]:
"""Run the complete test suite"""
logger.info("🚀 Starting Comprehensive Distributed Workflow Test Suite")
logger.info("=" * 60)
# Define test sequence
tests = [
("System Health Check", self.test_system_health),
("Cluster Status", self.test_cluster_status),
("Single Workflow Submission", self.test_workflow_submission),
("Multiple Workflow Submission", self.test_multiple_workflow_submission),
("Workflow Status Tracking", self.test_workflow_status_tracking),
("Workflow Cancellation", self.test_workflow_cancellation),
("Performance Metrics", self.test_performance_metrics),
("Cluster Optimization", self.test_cluster_optimization),
("Workflow Listing", self.test_workflow_listing),
("Agent Health Monitoring", self.test_agent_health_monitoring),
]
# Run all tests
for test_name, test_func in tests:
await self.run_test(test_name, test_func)
await asyncio.sleep(1) # Brief pause between tests
# Generate summary
total_tests = len(self.test_results)
passed_tests = sum(1 for result in self.test_results if result.success)
failed_tests = total_tests - passed_tests
total_duration = sum(result.duration for result in self.test_results)
summary = {
'total_tests': total_tests,
'passed_tests': passed_tests,
'failed_tests': failed_tests,
'success_rate': (passed_tests / total_tests) * 100 if total_tests > 0 else 0,
'total_duration': total_duration,
'workflow_ids_created': self.workflow_ids
}
logger.info("=" * 60)
logger.info("📊 Test Suite Summary:")
logger.info(f" Total Tests: {total_tests}")
logger.info(f" Passed: {passed_tests}")
logger.info(f" Failed: {failed_tests}")
logger.info(f" Success Rate: {summary['success_rate']:.1f}%")
logger.info(f" Total Duration: {total_duration:.2f}s")
logger.info(f" Workflows Created: {len(self.workflow_ids)}")
if failed_tests > 0:
logger.error("❌ Failed Tests:")
for result in self.test_results:
if not result.success:
logger.error(f" - {result.name}: {result.message}")
return summary
def generate_detailed_report(self) -> str:
"""Generate a detailed test report"""
report = []
report.append("# Hive Distributed Workflow System - Test Report")
report.append(f"Generated: {datetime.now().isoformat()}")
report.append("")
# Summary
total_tests = len(self.test_results)
passed_tests = sum(1 for result in self.test_results if result.success)
failed_tests = total_tests - passed_tests
total_duration = sum(result.duration for result in self.test_results)
report.append("## Test Summary")
report.append(f"- **Total Tests**: {total_tests}")
report.append(f"- **Passed**: {passed_tests}")
report.append(f"- **Failed**: {failed_tests}")
report.append(f"- **Success Rate**: {(passed_tests/total_tests)*100:.1f}%")
report.append(f"- **Total Duration**: {total_duration:.2f} seconds")
report.append(f"- **Workflows Created**: {len(self.workflow_ids)}")
report.append("")
# Detailed results
report.append("## Detailed Test Results")
for result in self.test_results:
status = "✅ PASS" if result.success else "❌ FAIL"
report.append(f"### {result.name} - {status}")
report.append(f"- **Duration**: {result.duration:.2f}s")
report.append(f"- **Message**: {result.message}")
if result.data:
report.append(f"- **Data**: ```json\n{json.dumps(result.data, indent=2)}\n```")
report.append("")
# Recommendations
report.append("## Recommendations")
if failed_tests == 0:
report.append("🎉 All tests passed! The distributed workflow system is functioning correctly.")
else:
report.append("⚠️ Some tests failed. Please review the failed tests and address any issues.")
report.append("")
report.append("### Failed Tests:")
for result in self.test_results:
if not result.success:
report.append(f"- **{result.name}**: {result.message}")
return "\n".join(report)
async def main():
"""Main test execution function"""
parser = argparse.ArgumentParser(description="Test Hive Distributed Workflow System")
parser.add_argument(
"--url",
default="http://localhost:8000",
help="Base URL for the Hive API (default: http://localhost:8000)"
)
parser.add_argument(
"--output",
help="Output file for detailed test report"
)
parser.add_argument(
"--single-test",
help="Run a single test by name"
)
args = parser.parse_args()
try:
async with DistributedWorkflowTester(args.url) as tester:
if args.single_test:
# Run single test
test_methods = {
'health': tester.test_system_health,
'cluster': tester.test_cluster_status,
'submit': tester.test_workflow_submission,
'multiple': tester.test_multiple_workflow_submission,
'status': tester.test_workflow_status_tracking,
'cancel': tester.test_workflow_cancellation,
'metrics': tester.test_performance_metrics,
'optimize': tester.test_cluster_optimization,
'list': tester.test_workflow_listing,
'agents': tester.test_agent_health_monitoring,
}
if args.single_test in test_methods:
await tester.run_test(args.single_test, test_methods[args.single_test])
else:
logger.error(f"Unknown test: {args.single_test}")
logger.info(f"Available tests: {', '.join(test_methods.keys())}")
return 1
else:
# Run full test suite
summary = await tester.run_comprehensive_test_suite()
# Generate and save report if requested
if args.output:
report = tester.generate_detailed_report()
with open(args.output, 'w') as f:
f.write(report)
logger.info(f"📄 Detailed report saved to: {args.output}")
# Return appropriate exit code
if args.single_test:
return 0 if tester.test_results[-1].success else 1
else:
return 0 if summary['failed_tests'] == 0 else 1
except KeyboardInterrupt:
logger.info("❌ Test execution interrupted by user")
return 1
except Exception as e:
logger.error(f"💥 Test execution failed: {str(e)}")
logger.debug(traceback.format_exc())
return 1
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)