Files
hive/backend/app/api/cluster_setup.py
anthonyrawlins 268214d971 Major WHOOSH system refactoring and feature enhancements
- Migrated from HIVE branding to WHOOSH across all components
- Enhanced backend API with new services: AI models, BZZZ integration, templates, members
- Added comprehensive testing suite with security, performance, and integration tests
- Improved frontend with new components for project setup, AI models, and team management
- Updated MCP server implementation with WHOOSH-specific tools and resources
- Enhanced deployment configurations with production-ready Docker setups
- Added comprehensive documentation and setup guides
- Implemented age encryption service and UCXL integration

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-27 08:34:48 +10:00

237 lines
8.5 KiB
Python

#!/usr/bin/env python3
"""
Cluster Setup API Endpoints for WHOOSH
Provides REST API for cluster infrastructure setup and BZZZ deployment
"""
import logging
from typing import Dict, List, Any, Optional
from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
from pydantic import BaseModel, Field
from ..services.cluster_setup_service import cluster_setup_service
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/cluster-setup", tags=["cluster-setup"])
# Request/Response Models
class NodeConfiguration(BaseModel):
hostname: str = Field(..., description="Node hostname")
ip_address: str = Field(..., description="Node IP address")
ssh_user: str = Field(..., description="SSH username")
ssh_port: int = Field(default=22, description="SSH port")
ssh_key_path: Optional[str] = Field(None, description="Path to SSH private key")
ssh_password: Optional[str] = Field(None, description="SSH password (if not using keys)")
role: str = Field(default="worker", description="Node role: coordinator, worker, storage")
class InfrastructureConfigRequest(BaseModel):
nodes: List[NodeConfiguration] = Field(..., description="List of cluster nodes")
class ModelSelectionRequest(BaseModel):
model_names: List[str] = Field(..., description="List of selected model names")
class AgentDeploymentRequest(BaseModel):
coordinator_hostname: str = Field(..., description="Hostname of coordinator node")
# API Endpoints
@router.get("/status")
async def get_setup_status() -> Dict[str, Any]:
"""Get current cluster setup status and progress"""
try:
logger.info("🔍 Getting cluster setup status")
status = await cluster_setup_service.get_setup_status()
logger.info(f"📊 Cluster setup status: {status['next_step']}")
return {
"success": True,
"data": status
}
except Exception as e:
logger.error(f"❌ Error getting setup status: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/models/available")
async def get_available_models() -> Dict[str, Any]:
"""Get list of available models from ollama.com registry"""
try:
logger.info("📋 Fetching available models from registry")
models = await cluster_setup_service.fetch_ollama_models()
return {
"success": True,
"data": {
"models": models,
"count": len(models)
}
}
except Exception as e:
logger.error(f"❌ Error fetching available models: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/infrastructure/configure")
async def configure_infrastructure(request: InfrastructureConfigRequest) -> Dict[str, Any]:
"""Configure cluster infrastructure with node connectivity testing"""
try:
logger.info(f"🏗️ Configuring infrastructure with {len(request.nodes)} nodes")
# Convert Pydantic models to dicts
nodes_data = [node.model_dump() for node in request.nodes]
result = await cluster_setup_service.configure_infrastructure(nodes_data)
if result["success"]:
logger.info(f"✅ Infrastructure configured: {result['nodes_accessible']}/{result['nodes_configured']} nodes accessible")
else:
logger.error(f"❌ Infrastructure configuration failed: {result.get('error')}")
return {
"success": result["success"],
"data": result
}
except Exception as e:
logger.error(f"❌ Error configuring infrastructure: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/keys/generate")
async def generate_age_keys() -> Dict[str, Any]:
"""Generate Age encryption keys for secure P2P communication"""
try:
logger.info("🔐 Generating Age encryption keys")
result = await cluster_setup_service.generate_age_keys()
if result["success"]:
logger.info("✅ Age keys generated successfully")
else:
logger.error(f"❌ Age key generation failed: {result.get('error')}")
return {
"success": result["success"],
"data": result
}
except Exception as e:
logger.error(f"❌ Error generating age keys: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/models/select")
async def select_models(request: ModelSelectionRequest) -> Dict[str, Any]:
"""Select models for cluster deployment"""
try:
logger.info(f"📦 Selecting {len(request.model_names)} models for cluster")
result = await cluster_setup_service.select_models(request.model_names)
if result["success"]:
logger.info(f"✅ Models selected: {request.model_names}")
else:
logger.error(f"❌ Model selection failed: {result.get('error')}")
return {
"success": result["success"],
"data": result
}
except Exception as e:
logger.error(f"❌ Error selecting models: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/agent/deploy-first")
async def deploy_first_agent(
request: AgentDeploymentRequest,
background_tasks: BackgroundTasks
) -> Dict[str, Any]:
"""Deploy the first BZZZ agent and pull selected models"""
try:
logger.info(f"🚀 Deploying first BZZZ agent to {request.coordinator_hostname}")
# This can take a long time, so we could optionally run it in background
result = await cluster_setup_service.deploy_first_agent(request.coordinator_hostname)
if result["success"]:
logger.info(f"✅ First agent deployed successfully to {request.coordinator_hostname}")
else:
logger.error(f"❌ First agent deployment failed: {result.get('error')}")
return {
"success": result["success"],
"data": result
}
except Exception as e:
logger.error(f"❌ Error deploying first agent: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/cluster/initialize")
async def initialize_cluster(background_tasks: BackgroundTasks) -> Dict[str, Any]:
"""Initialize the complete cluster with P2P model distribution"""
try:
logger.info("🌐 Initializing complete cluster")
# This definitely takes a long time, consider background task
result = await cluster_setup_service.initialize_cluster()
if result["success"]:
logger.info(f"✅ Cluster initialized: {result['successful_deployments']}/{result['cluster_nodes']} nodes")
else:
logger.error(f"❌ Cluster initialization failed: {result.get('error')}")
return {
"success": result["success"],
"data": result
}
except Exception as e:
logger.error(f"❌ Error initializing cluster: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/reset")
async def reset_setup() -> Dict[str, Any]:
"""Reset cluster setup state (for development/testing)"""
try:
logger.info("🔄 Resetting cluster setup state")
# Reset the setup service state
cluster_setup_service.setup_state = cluster_setup_service.__class__.ClusterSetupState()
logger.info("✅ Cluster setup state reset")
return {
"success": True,
"message": "Cluster setup state has been reset"
}
except Exception as e:
logger.error(f"❌ Error resetting setup: {e}")
raise HTTPException(status_code=500, detail=str(e))
# Health check for the setup service
@router.get("/health")
async def health_check() -> Dict[str, Any]:
"""Health check for cluster setup service"""
try:
# Initialize if not already done
if not hasattr(cluster_setup_service, 'session') or cluster_setup_service.session is None:
await cluster_setup_service.initialize()
return {
"success": True,
"service": "cluster_setup",
"status": "healthy",
"initialized": cluster_setup_service.session is not None
}
except Exception as e:
logger.error(f"❌ Health check failed: {e}")
return {
"success": False,
"service": "cluster_setup",
"status": "unhealthy",
"error": str(e)
}