- Migrated from HIVE branding to WHOOSH across all components - Enhanced backend API with new services: AI models, BZZZ integration, templates, members - Added comprehensive testing suite with security, performance, and integration tests - Improved frontend with new components for project setup, AI models, and team management - Updated MCP server implementation with WHOOSH-specific tools and resources - Enhanced deployment configurations with production-ready Docker setups - Added comprehensive documentation and setup guides - Implemented age encryption service and UCXL integration 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
651 lines
27 KiB
Python
651 lines
27 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Cluster Setup Service for WHOOSH
|
|
Handles initial cluster setup, infrastructure discovery, and BZZZ agent deployment
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import aiohttp
|
|
import asyncssh
|
|
from typing import Dict, List, Optional, Any
|
|
from datetime import datetime
|
|
from dataclasses import dataclass, asdict
|
|
from pathlib import Path
|
|
import subprocess
|
|
import tempfile
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@dataclass
|
|
class ClusterNode:
|
|
"""Cluster node configuration"""
|
|
hostname: str
|
|
ip_address: str
|
|
ssh_user: str
|
|
ssh_port: int = 22
|
|
ssh_key_path: Optional[str] = None
|
|
ssh_password: Optional[str] = None
|
|
role: str = "worker" # coordinator, worker, storage
|
|
status: str = "pending" # pending, connecting, ready, error
|
|
capabilities: List[str] = None
|
|
ollama_models: List[str] = None
|
|
|
|
def __post_init__(self):
|
|
if self.capabilities is None:
|
|
self.capabilities = []
|
|
if self.ollama_models is None:
|
|
self.ollama_models = []
|
|
|
|
@dataclass
|
|
class ClusterSetupState:
|
|
"""Overall cluster setup state"""
|
|
infrastructure_configured: bool = False
|
|
age_keys_generated: bool = False
|
|
models_selected: bool = False
|
|
first_agent_deployed: bool = False
|
|
cluster_initialized: bool = False
|
|
nodes: List[ClusterNode] = None
|
|
selected_models: List[str] = None
|
|
age_keys: Dict[str, str] = None
|
|
|
|
def __post_init__(self):
|
|
if self.nodes is None:
|
|
self.nodes = []
|
|
if self.selected_models is None:
|
|
self.selected_models = []
|
|
if self.age_keys is None:
|
|
self.age_keys = {}
|
|
|
|
class ClusterSetupService:
|
|
"""
|
|
Service for setting up the WHOOSH distributed cluster infrastructure.
|
|
Handles infrastructure discovery, age key generation, model selection, and BZZZ deployment.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.setup_state = ClusterSetupState()
|
|
self.session: Optional[aiohttp.ClientSession] = None
|
|
|
|
async def initialize(self) -> bool:
|
|
"""Initialize the cluster setup service"""
|
|
try:
|
|
logger.info("🚀 Initializing Cluster Setup Service")
|
|
|
|
self.session = aiohttp.ClientSession(
|
|
timeout=aiohttp.ClientTimeout(total=30)
|
|
)
|
|
|
|
# Check if cluster is already set up
|
|
await self._detect_existing_cluster()
|
|
|
|
logger.info("✅ Cluster Setup Service initialized")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Failed to initialize cluster setup service: {e}")
|
|
return False
|
|
|
|
async def _detect_existing_cluster(self) -> None:
|
|
"""Detect if cluster infrastructure already exists"""
|
|
try:
|
|
# Check for existing BZZZ agents on known endpoints
|
|
known_endpoints = [
|
|
# Direct BZZZ connections disabled - WHOOSH should use BZZZ API instead
|
|
# "http://192.168.1.27:8080", # walnut
|
|
# "http://192.168.1.72:8080", # acacia
|
|
# "http://192.168.1.113:8080", # ironwood
|
|
# "http://192.168.1.106:8080", # oak
|
|
]
|
|
|
|
active_nodes = []
|
|
for endpoint in known_endpoints:
|
|
try:
|
|
async with self.session.get(f"{endpoint}/api/agent/status", timeout=aiohttp.ClientTimeout(total=5)) as response:
|
|
if response.status == 200:
|
|
data = await response.json()
|
|
node_info = ClusterNode(
|
|
hostname=data.get("hostname", endpoint.split("//")[1].split(":")[0]),
|
|
ip_address=endpoint.split("//")[1].split(":")[0],
|
|
ssh_user="auto-detected",
|
|
status="ready",
|
|
capabilities=data.get("capabilities", []),
|
|
ollama_models=data.get("models", [])
|
|
)
|
|
active_nodes.append(node_info)
|
|
logger.info(f"🔍 Detected active BZZZ agent: {endpoint}")
|
|
|
|
except Exception as e:
|
|
logger.debug(f"No BZZZ agent at {endpoint}: {e}")
|
|
|
|
if active_nodes:
|
|
self.setup_state.nodes = active_nodes
|
|
self.setup_state.infrastructure_configured = True
|
|
self.setup_state.first_agent_deployed = True
|
|
self.setup_state.cluster_initialized = True
|
|
logger.info(f"🎯 Detected existing cluster with {len(active_nodes)} nodes")
|
|
else:
|
|
logger.info("🆕 No existing cluster detected - fresh setup required")
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Error detecting existing cluster: {e}")
|
|
|
|
async def get_setup_status(self) -> Dict[str, Any]:
|
|
"""Get current cluster setup status"""
|
|
return {
|
|
"cluster_exists": self.setup_state.cluster_initialized,
|
|
"infrastructure_configured": self.setup_state.infrastructure_configured,
|
|
"age_keys_generated": self.setup_state.age_keys_generated,
|
|
"models_selected": self.setup_state.models_selected,
|
|
"first_agent_deployed": self.setup_state.first_agent_deployed,
|
|
"cluster_initialized": self.setup_state.cluster_initialized,
|
|
"nodes": [asdict(node) for node in self.setup_state.nodes],
|
|
"selected_models": self.setup_state.selected_models,
|
|
"next_step": self._get_next_setup_step()
|
|
}
|
|
|
|
def _get_next_setup_step(self) -> str:
|
|
"""Determine the next step in cluster setup"""
|
|
if not self.setup_state.infrastructure_configured:
|
|
return "configure_infrastructure"
|
|
elif not self.setup_state.age_keys_generated:
|
|
return "generate_age_keys"
|
|
elif not self.setup_state.models_selected:
|
|
return "select_models"
|
|
elif not self.setup_state.first_agent_deployed:
|
|
return "deploy_first_agent"
|
|
elif not self.setup_state.cluster_initialized:
|
|
return "initialize_cluster"
|
|
else:
|
|
return "complete"
|
|
|
|
async def fetch_ollama_models(self) -> List[Dict[str, Any]]:
|
|
"""Fetch available models from ollama.com registry"""
|
|
try:
|
|
# Real models from Ollama registry based on your cluster data
|
|
models = [
|
|
# Popular General Purpose Models
|
|
{
|
|
"name": "llama3.1:8b",
|
|
"description": "Llama 3.1 8B - State-of-the-art model from Meta available in 8B parameters",
|
|
"size": "4.7GB",
|
|
"category": "general",
|
|
"capabilities": ["tools", "chat", "reasoning", "code"]
|
|
},
|
|
{
|
|
"name": "llama3.1:70b",
|
|
"description": "Llama 3.1 70B - Large high-performance model for demanding tasks",
|
|
"size": "40GB",
|
|
"category": "advanced",
|
|
"capabilities": ["tools", "chat", "reasoning", "code", "complex"]
|
|
},
|
|
{
|
|
"name": "llama3.2:3b",
|
|
"description": "Meta's Llama 3.2 3B - Compact model that runs efficiently",
|
|
"size": "2.0GB",
|
|
"category": "general",
|
|
"capabilities": ["tools", "chat", "lightweight"]
|
|
},
|
|
{
|
|
"name": "llama3.2:1b",
|
|
"description": "Meta's Llama 3.2 1B - Ultra lightweight for edge devices",
|
|
"size": "1.3GB",
|
|
"category": "lightweight",
|
|
"capabilities": ["tools", "chat", "edge", "fast"]
|
|
},
|
|
|
|
# Coding Models
|
|
{
|
|
"name": "qwen2.5-coder:7b",
|
|
"description": "Latest Code-Specific Qwen model with significant improvements in code generation",
|
|
"size": "4.1GB",
|
|
"category": "code",
|
|
"capabilities": ["tools", "code", "reasoning", "programming"]
|
|
},
|
|
{
|
|
"name": "codellama:7b",
|
|
"description": "Code Llama 7B - Large language model for code generation and discussion",
|
|
"size": "3.8GB",
|
|
"category": "code",
|
|
"capabilities": ["code", "programming", "debugging"]
|
|
},
|
|
{
|
|
"name": "deepseek-coder:6.7b",
|
|
"description": "DeepSeek Coder 6.7B - Trained on code and natural language tokens",
|
|
"size": "3.8GB",
|
|
"category": "code",
|
|
"capabilities": ["code", "programming", "generation"]
|
|
},
|
|
|
|
# Reasoning Models
|
|
{
|
|
"name": "deepseek-r1:7b",
|
|
"description": "DeepSeek-R1 7B - Open reasoning model with advanced thinking capabilities",
|
|
"size": "4.2GB",
|
|
"category": "reasoning",
|
|
"capabilities": ["tools", "thinking", "reasoning", "analysis"]
|
|
},
|
|
{
|
|
"name": "qwen3:8b",
|
|
"description": "Qwen3 8B - Latest generation with dense and mixture-of-experts models",
|
|
"size": "4.6GB",
|
|
"category": "general",
|
|
"capabilities": ["tools", "thinking", "reasoning", "multilingual"]
|
|
},
|
|
|
|
# Efficient Models
|
|
{
|
|
"name": "mistral:7b",
|
|
"description": "Mistral 7B - Fast general purpose model updated to version 0.3",
|
|
"size": "4.1GB",
|
|
"category": "general",
|
|
"capabilities": ["tools", "chat", "reasoning", "fast"]
|
|
},
|
|
{
|
|
"name": "gemma2:9b",
|
|
"description": "Google Gemma 2 9B - High-performing efficient model with multilingual support",
|
|
"size": "5.4GB",
|
|
"category": "general",
|
|
"capabilities": ["chat", "reasoning", "math", "analysis"]
|
|
},
|
|
{
|
|
"name": "qwen2.5:7b",
|
|
"description": "Qwen2.5 7B - Multilingual model with 128K context length",
|
|
"size": "4.4GB",
|
|
"category": "general",
|
|
"capabilities": ["tools", "chat", "multilingual", "reasoning"]
|
|
},
|
|
|
|
# Embedding Models
|
|
{
|
|
"name": "nomic-embed-text",
|
|
"description": "High-performing open embedding model with large token context window",
|
|
"size": "274MB",
|
|
"category": "embedding",
|
|
"capabilities": ["embedding", "search", "similarity"]
|
|
},
|
|
{
|
|
"name": "mxbai-embed-large",
|
|
"description": "State-of-the-art large embedding model from mixedbread.ai",
|
|
"size": "670MB",
|
|
"category": "embedding",
|
|
"capabilities": ["embedding", "search", "retrieval"]
|
|
}
|
|
]
|
|
|
|
logger.info(f"📋 Fetched {len(models)} available models from registry")
|
|
return models
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Error fetching ollama models: {e}")
|
|
return []
|
|
|
|
async def configure_infrastructure(self, nodes: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""Configure cluster infrastructure with provided node information"""
|
|
try:
|
|
logger.info(f"🏗️ Configuring infrastructure with {len(nodes)} nodes")
|
|
|
|
# Convert dict nodes to ClusterNode objects
|
|
cluster_nodes = []
|
|
for node_data in nodes:
|
|
node = ClusterNode(
|
|
hostname=node_data["hostname"],
|
|
ip_address=node_data["ip_address"],
|
|
ssh_user=node_data["ssh_user"],
|
|
ssh_port=node_data.get("ssh_port", 22),
|
|
ssh_key_path=node_data.get("ssh_key_path"),
|
|
ssh_password=node_data.get("ssh_password"),
|
|
role=node_data.get("role", "worker")
|
|
)
|
|
cluster_nodes.append(node)
|
|
|
|
# Test SSH connectivity to all nodes
|
|
connectivity_results = await self._test_node_connectivity(cluster_nodes)
|
|
|
|
# Update node statuses based on connectivity
|
|
for i, result in enumerate(connectivity_results):
|
|
cluster_nodes[i].status = "ready" if result["success"] else "error"
|
|
|
|
self.setup_state.nodes = cluster_nodes
|
|
self.setup_state.infrastructure_configured = True
|
|
|
|
successful_nodes = sum(1 for result in connectivity_results if result["success"])
|
|
|
|
return {
|
|
"success": True,
|
|
"nodes_configured": len(nodes),
|
|
"nodes_accessible": successful_nodes,
|
|
"connectivity_results": connectivity_results
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Error configuring infrastructure: {e}")
|
|
return {"success": False, "error": str(e)}
|
|
|
|
async def _test_node_connectivity(self, nodes: List[ClusterNode]) -> List[Dict[str, Any]]:
|
|
"""Test SSH connectivity to all cluster nodes"""
|
|
async def test_node(node: ClusterNode) -> Dict[str, Any]:
|
|
try:
|
|
# Test SSH connection
|
|
if node.ssh_key_path:
|
|
# Use SSH key authentication
|
|
async with asyncssh.connect(
|
|
node.ip_address,
|
|
port=node.ssh_port,
|
|
username=node.ssh_user,
|
|
client_keys=[node.ssh_key_path],
|
|
known_hosts=None # Skip host key verification for now
|
|
) as conn:
|
|
result = await conn.run('echo "SSH test successful"')
|
|
return {
|
|
"hostname": node.hostname,
|
|
"success": True,
|
|
"message": "SSH connection successful",
|
|
"output": result.stdout.strip()
|
|
}
|
|
else:
|
|
# Use password authentication
|
|
async with asyncssh.connect(
|
|
node.ip_address,
|
|
port=node.ssh_port,
|
|
username=node.ssh_user,
|
|
password=node.ssh_password,
|
|
known_hosts=None
|
|
) as conn:
|
|
result = await conn.run('echo "SSH test successful"')
|
|
return {
|
|
"hostname": node.hostname,
|
|
"success": True,
|
|
"message": "SSH connection successful",
|
|
"output": result.stdout.strip()
|
|
}
|
|
|
|
except Exception as e:
|
|
return {
|
|
"hostname": node.hostname,
|
|
"success": False,
|
|
"message": f"SSH connection failed: {str(e)}"
|
|
}
|
|
|
|
# Test all nodes concurrently
|
|
connectivity_tasks = [test_node(node) for node in nodes]
|
|
results = await asyncio.gather(*connectivity_tasks, return_exceptions=True)
|
|
|
|
# Handle any exceptions in the results
|
|
formatted_results = []
|
|
for i, result in enumerate(results):
|
|
if isinstance(result, Exception):
|
|
formatted_results.append({
|
|
"hostname": nodes[i].hostname,
|
|
"success": False,
|
|
"message": f"Connection test failed: {str(result)}"
|
|
})
|
|
else:
|
|
formatted_results.append(result)
|
|
|
|
return formatted_results
|
|
|
|
async def generate_age_keys(self) -> Dict[str, Any]:
|
|
"""Generate Age encryption keys for secure P2P communication"""
|
|
try:
|
|
logger.info("🔐 Generating Age encryption keys")
|
|
|
|
# Generate age key pair using subprocess
|
|
result = subprocess.run(
|
|
["age-keygen"],
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
# Parse the key output
|
|
output_lines = result.stdout.strip().split('\n')
|
|
private_key = ""
|
|
public_key = ""
|
|
|
|
for line in output_lines:
|
|
if line.startswith("AGE-SECRET-KEY-"):
|
|
private_key = line
|
|
elif line.startswith("age"):
|
|
public_key = line
|
|
|
|
self.setup_state.age_keys = {
|
|
"private_key": private_key,
|
|
"public_key": public_key,
|
|
"generated_at": datetime.utcnow().isoformat()
|
|
}
|
|
self.setup_state.age_keys_generated = True
|
|
|
|
logger.info("✅ Age keys generated successfully")
|
|
return {
|
|
"success": True,
|
|
"public_key": public_key,
|
|
"message": "Age encryption keys generated successfully"
|
|
}
|
|
else:
|
|
raise Exception(f"age-keygen failed: {result.stderr}")
|
|
|
|
except FileNotFoundError:
|
|
logger.error("❌ age-keygen command not found - please install age")
|
|
return {
|
|
"success": False,
|
|
"error": "age-keygen command not found - please install age encryption tool"
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"❌ Error generating age keys: {e}")
|
|
return {
|
|
"success": False,
|
|
"error": str(e)
|
|
}
|
|
|
|
async def select_models(self, model_names: List[str]) -> Dict[str, Any]:
|
|
"""Select models for the cluster"""
|
|
try:
|
|
logger.info(f"📦 Selecting {len(model_names)} models for cluster")
|
|
|
|
self.setup_state.selected_models = model_names
|
|
self.setup_state.models_selected = True
|
|
|
|
return {
|
|
"success": True,
|
|
"selected_models": model_names,
|
|
"message": f"Selected {len(model_names)} models for deployment"
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Error selecting models: {e}")
|
|
return {"success": False, "error": str(e)}
|
|
|
|
async def deploy_first_agent(self, coordinator_node_hostname: str) -> Dict[str, Any]:
|
|
"""Deploy the first BZZZ agent and pull selected models"""
|
|
try:
|
|
logger.info(f"🚀 Deploying first BZZZ agent to {coordinator_node_hostname}")
|
|
|
|
# Find the coordinator node
|
|
coordinator_node = None
|
|
for node in self.setup_state.nodes:
|
|
if node.hostname == coordinator_node_hostname:
|
|
coordinator_node = node
|
|
break
|
|
|
|
if not coordinator_node:
|
|
raise Exception(f"Coordinator node {coordinator_node_hostname} not found")
|
|
|
|
# Deploy BZZZ agent via SSH
|
|
deployment_result = await self._deploy_bzzz_agent(coordinator_node, is_coordinator=True)
|
|
|
|
if deployment_result["success"]:
|
|
# Pull selected models on the coordinator
|
|
model_results = await self._pull_models_on_node(coordinator_node, self.setup_state.selected_models)
|
|
|
|
self.setup_state.first_agent_deployed = True
|
|
coordinator_node.status = "ready"
|
|
coordinator_node.ollama_models = self.setup_state.selected_models
|
|
|
|
return {
|
|
"success": True,
|
|
"coordinator": coordinator_node_hostname,
|
|
"models_pulled": len(self.setup_state.selected_models),
|
|
"deployment_details": deployment_result,
|
|
"model_results": model_results
|
|
}
|
|
else:
|
|
return deployment_result
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Error deploying first agent: {e}")
|
|
return {"success": False, "error": str(e)}
|
|
|
|
async def _deploy_bzzz_agent(self, node: ClusterNode, is_coordinator: bool = False) -> Dict[str, Any]:
|
|
"""Deploy BZZZ agent as native systemd service to a specific node"""
|
|
try:
|
|
# SSH to node and deploy BZZZ
|
|
if node.ssh_key_path:
|
|
conn_kwargs = {"client_keys": [node.ssh_key_path]}
|
|
else:
|
|
conn_kwargs = {"password": node.ssh_password}
|
|
|
|
async with asyncssh.connect(
|
|
node.ip_address,
|
|
port=node.ssh_port,
|
|
username=node.ssh_user,
|
|
known_hosts=None,
|
|
**conn_kwargs
|
|
) as conn:
|
|
|
|
# Install Go and Git if not present
|
|
await conn.run("sudo apt-get update && sudo apt-get install -y golang-go git build-essential")
|
|
|
|
# Clone BZZZ repository
|
|
await conn.run("rm -rf ~/chorus && mkdir -p ~/chorus/project-queues/active")
|
|
clone_cmd = "cd ~/chorus/project-queues/active && git clone https://gitea.deepblack.cloud/tony/BZZZ.git"
|
|
await conn.run(clone_cmd)
|
|
|
|
# Build BZZZ binary
|
|
build_cmd = "cd ~/chorus/project-queues/active/BZZZ && go build -o bzzz"
|
|
build_result = await conn.run(build_cmd)
|
|
|
|
# Create BZZZ configuration (if needed - check if BZZZ uses config files)
|
|
config = {
|
|
"node": {"id": node.hostname},
|
|
"agent": {"id": f"bzzz-{node.hostname}", "role": node.role},
|
|
"api": {"host": "0.0.0.0", "port": 8080},
|
|
"p2p": {"port": 4001},
|
|
"coordinator": is_coordinator
|
|
}
|
|
|
|
# Write config file (adjust path as needed)
|
|
config_json = json.dumps(config, indent=2)
|
|
await conn.run(f'mkdir -p ~/chorus/project-queues/active/BZZZ/config && echo \'{config_json}\' > ~/chorus/project-queues/active/BZZZ/config/bzzz.json')
|
|
|
|
# Install BZZZ as systemd service
|
|
install_cmd = "cd ~/chorus/project-queues/active/BZZZ && sudo ./install-service.sh"
|
|
install_result = await conn.run(install_cmd)
|
|
|
|
return {
|
|
"success": True,
|
|
"message": f"BZZZ agent deployed as systemd service to {node.hostname}",
|
|
"build_output": build_result.stdout,
|
|
"install_output": install_result.stdout
|
|
}
|
|
|
|
except Exception as e:
|
|
return {
|
|
"success": False,
|
|
"error": f"Failed to deploy BZZZ agent to {node.hostname}: {str(e)}"
|
|
}
|
|
|
|
async def _pull_models_on_node(self, node: ClusterNode, models: List[str]) -> List[Dict[str, Any]]:
|
|
"""Pull Ollama models on a specific node"""
|
|
try:
|
|
if node.ssh_key_path:
|
|
conn_kwargs = {"client_keys": [node.ssh_key_path]}
|
|
else:
|
|
conn_kwargs = {"password": node.ssh_password}
|
|
|
|
async with asyncssh.connect(
|
|
node.ip_address,
|
|
port=node.ssh_port,
|
|
username=node.ssh_user,
|
|
known_hosts=None,
|
|
**conn_kwargs
|
|
) as conn:
|
|
|
|
# Install Ollama if not present
|
|
await conn.run("curl -fsSL https://ollama.com/install.sh | sh")
|
|
|
|
# Start Ollama service
|
|
await conn.run("sudo systemctl enable ollama && sudo systemctl start ollama")
|
|
|
|
# Pull each model
|
|
results = []
|
|
for model in models:
|
|
try:
|
|
result = await conn.run(f"ollama pull {model}")
|
|
results.append({
|
|
"model": model,
|
|
"success": True,
|
|
"output": result.stdout
|
|
})
|
|
logger.info(f"✅ Pulled model {model} on {node.hostname}")
|
|
except Exception as e:
|
|
results.append({
|
|
"model": model,
|
|
"success": False,
|
|
"error": str(e)
|
|
})
|
|
logger.error(f"❌ Failed to pull model {model} on {node.hostname}: {e}")
|
|
|
|
return results
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Error pulling models on {node.hostname}: {e}")
|
|
return [{"error": str(e), "success": False}]
|
|
|
|
async def initialize_cluster(self) -> Dict[str, Any]:
|
|
"""Initialize the complete cluster with P2P model distribution"""
|
|
try:
|
|
logger.info("🌐 Initializing complete cluster")
|
|
|
|
# Deploy BZZZ agents to remaining nodes
|
|
remaining_nodes = [node for node in self.setup_state.nodes if node.status != "ready"]
|
|
|
|
deployment_results = []
|
|
for node in remaining_nodes:
|
|
result = await self._deploy_bzzz_agent(node, is_coordinator=False)
|
|
deployment_results.append(result)
|
|
|
|
if result["success"]:
|
|
node.status = "ready"
|
|
|
|
# TODO: Implement P2P model distribution via BZZZ network
|
|
# For now, we'll note that models should be distributed via P2P
|
|
|
|
self.setup_state.cluster_initialized = True
|
|
|
|
successful_deployments = sum(1 for r in deployment_results if r["success"])
|
|
|
|
return {
|
|
"success": True,
|
|
"cluster_nodes": len(self.setup_state.nodes),
|
|
"successful_deployments": successful_deployments,
|
|
"deployment_results": deployment_results,
|
|
"message": "Cluster initialization completed"
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Error initializing cluster: {e}")
|
|
return {"success": False, "error": str(e)}
|
|
|
|
async def cleanup(self) -> None:
|
|
"""Cleanup cluster setup service resources"""
|
|
try:
|
|
if self.session:
|
|
await self.session.close()
|
|
logger.info("🧹 Cluster Setup Service cleanup completed")
|
|
except Exception as e:
|
|
logger.error(f"❌ Error during cleanup: {e}")
|
|
|
|
# Global service instance
|
|
cluster_setup_service = ClusterSetupService() |