Major WHOOSH system refactoring and feature enhancements
- Migrated from HIVE branding to WHOOSH across all components - Enhanced backend API with new services: AI models, BZZZ integration, templates, members - Added comprehensive testing suite with security, performance, and integration tests - Improved frontend with new components for project setup, AI models, and team management - Updated MCP server implementation with WHOOSH-specific tools and resources - Enhanced deployment configurations with production-ready Docker setups - Added comprehensive documentation and setup guides - Implemented age encryption service and UCXL integration 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
237
backend/app/api/cluster_setup.py
Normal file
237
backend/app/api/cluster_setup.py
Normal file
@@ -0,0 +1,237 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Cluster Setup API Endpoints for WHOOSH
|
||||
Provides REST API for cluster infrastructure setup and BZZZ deployment
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Any, Optional
|
||||
from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..services.cluster_setup_service import cluster_setup_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/cluster-setup", tags=["cluster-setup"])
|
||||
|
||||
# Request/Response Models
|
||||
class NodeConfiguration(BaseModel):
|
||||
hostname: str = Field(..., description="Node hostname")
|
||||
ip_address: str = Field(..., description="Node IP address")
|
||||
ssh_user: str = Field(..., description="SSH username")
|
||||
ssh_port: int = Field(default=22, description="SSH port")
|
||||
ssh_key_path: Optional[str] = Field(None, description="Path to SSH private key")
|
||||
ssh_password: Optional[str] = Field(None, description="SSH password (if not using keys)")
|
||||
role: str = Field(default="worker", description="Node role: coordinator, worker, storage")
|
||||
|
||||
class InfrastructureConfigRequest(BaseModel):
|
||||
nodes: List[NodeConfiguration] = Field(..., description="List of cluster nodes")
|
||||
|
||||
class ModelSelectionRequest(BaseModel):
|
||||
model_names: List[str] = Field(..., description="List of selected model names")
|
||||
|
||||
class AgentDeploymentRequest(BaseModel):
|
||||
coordinator_hostname: str = Field(..., description="Hostname of coordinator node")
|
||||
|
||||
# API Endpoints
|
||||
|
||||
@router.get("/status")
|
||||
async def get_setup_status() -> Dict[str, Any]:
|
||||
"""Get current cluster setup status and progress"""
|
||||
try:
|
||||
logger.info("🔍 Getting cluster setup status")
|
||||
|
||||
status = await cluster_setup_service.get_setup_status()
|
||||
|
||||
logger.info(f"📊 Cluster setup status: {status['next_step']}")
|
||||
return {
|
||||
"success": True,
|
||||
"data": status
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error getting setup status: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@router.get("/models/available")
|
||||
async def get_available_models() -> Dict[str, Any]:
|
||||
"""Get list of available models from ollama.com registry"""
|
||||
try:
|
||||
logger.info("📋 Fetching available models from registry")
|
||||
|
||||
models = await cluster_setup_service.fetch_ollama_models()
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"data": {
|
||||
"models": models,
|
||||
"count": len(models)
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error fetching available models: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@router.post("/infrastructure/configure")
|
||||
async def configure_infrastructure(request: InfrastructureConfigRequest) -> Dict[str, Any]:
|
||||
"""Configure cluster infrastructure with node connectivity testing"""
|
||||
try:
|
||||
logger.info(f"🏗️ Configuring infrastructure with {len(request.nodes)} nodes")
|
||||
|
||||
# Convert Pydantic models to dicts
|
||||
nodes_data = [node.model_dump() for node in request.nodes]
|
||||
|
||||
result = await cluster_setup_service.configure_infrastructure(nodes_data)
|
||||
|
||||
if result["success"]:
|
||||
logger.info(f"✅ Infrastructure configured: {result['nodes_accessible']}/{result['nodes_configured']} nodes accessible")
|
||||
else:
|
||||
logger.error(f"❌ Infrastructure configuration failed: {result.get('error')}")
|
||||
|
||||
return {
|
||||
"success": result["success"],
|
||||
"data": result
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error configuring infrastructure: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@router.post("/keys/generate")
|
||||
async def generate_age_keys() -> Dict[str, Any]:
|
||||
"""Generate Age encryption keys for secure P2P communication"""
|
||||
try:
|
||||
logger.info("🔐 Generating Age encryption keys")
|
||||
|
||||
result = await cluster_setup_service.generate_age_keys()
|
||||
|
||||
if result["success"]:
|
||||
logger.info("✅ Age keys generated successfully")
|
||||
else:
|
||||
logger.error(f"❌ Age key generation failed: {result.get('error')}")
|
||||
|
||||
return {
|
||||
"success": result["success"],
|
||||
"data": result
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error generating age keys: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@router.post("/models/select")
|
||||
async def select_models(request: ModelSelectionRequest) -> Dict[str, Any]:
|
||||
"""Select models for cluster deployment"""
|
||||
try:
|
||||
logger.info(f"📦 Selecting {len(request.model_names)} models for cluster")
|
||||
|
||||
result = await cluster_setup_service.select_models(request.model_names)
|
||||
|
||||
if result["success"]:
|
||||
logger.info(f"✅ Models selected: {request.model_names}")
|
||||
else:
|
||||
logger.error(f"❌ Model selection failed: {result.get('error')}")
|
||||
|
||||
return {
|
||||
"success": result["success"],
|
||||
"data": result
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error selecting models: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@router.post("/agent/deploy-first")
|
||||
async def deploy_first_agent(
|
||||
request: AgentDeploymentRequest,
|
||||
background_tasks: BackgroundTasks
|
||||
) -> Dict[str, Any]:
|
||||
"""Deploy the first BZZZ agent and pull selected models"""
|
||||
try:
|
||||
logger.info(f"🚀 Deploying first BZZZ agent to {request.coordinator_hostname}")
|
||||
|
||||
# This can take a long time, so we could optionally run it in background
|
||||
result = await cluster_setup_service.deploy_first_agent(request.coordinator_hostname)
|
||||
|
||||
if result["success"]:
|
||||
logger.info(f"✅ First agent deployed successfully to {request.coordinator_hostname}")
|
||||
else:
|
||||
logger.error(f"❌ First agent deployment failed: {result.get('error')}")
|
||||
|
||||
return {
|
||||
"success": result["success"],
|
||||
"data": result
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error deploying first agent: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@router.post("/cluster/initialize")
|
||||
async def initialize_cluster(background_tasks: BackgroundTasks) -> Dict[str, Any]:
|
||||
"""Initialize the complete cluster with P2P model distribution"""
|
||||
try:
|
||||
logger.info("🌐 Initializing complete cluster")
|
||||
|
||||
# This definitely takes a long time, consider background task
|
||||
result = await cluster_setup_service.initialize_cluster()
|
||||
|
||||
if result["success"]:
|
||||
logger.info(f"✅ Cluster initialized: {result['successful_deployments']}/{result['cluster_nodes']} nodes")
|
||||
else:
|
||||
logger.error(f"❌ Cluster initialization failed: {result.get('error')}")
|
||||
|
||||
return {
|
||||
"success": result["success"],
|
||||
"data": result
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error initializing cluster: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@router.post("/reset")
|
||||
async def reset_setup() -> Dict[str, Any]:
|
||||
"""Reset cluster setup state (for development/testing)"""
|
||||
try:
|
||||
logger.info("🔄 Resetting cluster setup state")
|
||||
|
||||
# Reset the setup service state
|
||||
cluster_setup_service.setup_state = cluster_setup_service.__class__.ClusterSetupState()
|
||||
|
||||
logger.info("✅ Cluster setup state reset")
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Cluster setup state has been reset"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error resetting setup: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Health check for the setup service
|
||||
@router.get("/health")
|
||||
async def health_check() -> Dict[str, Any]:
|
||||
"""Health check for cluster setup service"""
|
||||
try:
|
||||
# Initialize if not already done
|
||||
if not hasattr(cluster_setup_service, 'session') or cluster_setup_service.session is None:
|
||||
await cluster_setup_service.initialize()
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"service": "cluster_setup",
|
||||
"status": "healthy",
|
||||
"initialized": cluster_setup_service.session is not None
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Health check failed: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"service": "cluster_setup",
|
||||
"status": "unhealthy",
|
||||
"error": str(e)
|
||||
}
|
||||
Reference in New Issue
Block a user