WIP: Save current work before CHORUS rebrand
- Agent roles integration progress - Various backend and frontend updates - Storybook cache cleanup 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -26,7 +26,7 @@ class ClusterService:
|
||||
"ip": "192.168.1.113",
|
||||
"hostname": "ironwood",
|
||||
"role": "worker",
|
||||
"gpu": "NVIDIA RTX 3070",
|
||||
"gpu": "NVIDIA RTX 2080S",
|
||||
"memory": "128GB",
|
||||
"cpu": "AMD Threadripper 2920X",
|
||||
"ollama_port": 11434,
|
||||
@@ -57,6 +57,66 @@ class ClusterService:
|
||||
self.n8n_api_base = "https://n8n.home.deepblack.cloud/api/v1"
|
||||
self.n8n_api_key = self._get_n8n_api_key()
|
||||
|
||||
def _get_live_hardware_info(self, hostname: str, ip: str) -> Dict[str, str]:
|
||||
"""Get live hardware information from a remote node via SSH."""
|
||||
hardware = {
|
||||
"cpu": "Unknown",
|
||||
"memory": "Unknown",
|
||||
"gpu": "Unknown"
|
||||
}
|
||||
|
||||
try:
|
||||
# Try to get GPU info via SSH
|
||||
print(f"🔍 SSH GPU command for {hostname}: ssh tony@{ip} 'nvidia-smi || lspci | grep -i vga'")
|
||||
gpu_result = subprocess.run([
|
||||
"ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5",
|
||||
f"tony@{ip}", "nvidia-smi --query-gpu=name --format=csv,noheader,nounits || lspci | grep -i 'vga\\|3d\\|display'"
|
||||
], capture_output=True, text=True, timeout=10)
|
||||
|
||||
print(f"📊 GPU command result for {hostname}: returncode={gpu_result.returncode}, stdout='{gpu_result.stdout.strip()}', stderr='{gpu_result.stderr.strip()}'")
|
||||
|
||||
if gpu_result.returncode == 0 and gpu_result.stdout.strip():
|
||||
gpu_info = gpu_result.stdout.strip().split('\n')[0]
|
||||
if "NVIDIA" in gpu_info or "RTX" in gpu_info or "GTX" in gpu_info:
|
||||
hardware["gpu"] = gpu_info.strip()
|
||||
elif "VGA" in gpu_info or "Display" in gpu_info:
|
||||
# Parse lspci output for GPU info
|
||||
if "NVIDIA" in gpu_info:
|
||||
parts = gpu_info.split("NVIDIA")
|
||||
if len(parts) > 1:
|
||||
gpu_name = "NVIDIA" + parts[1].split('[')[0].strip()
|
||||
hardware["gpu"] = gpu_name
|
||||
elif "AMD" in gpu_info or "Radeon" in gpu_info:
|
||||
parts = gpu_info.split(":")
|
||||
if len(parts) > 2:
|
||||
gpu_name = parts[2].strip()
|
||||
hardware["gpu"] = gpu_name
|
||||
|
||||
# Try to get memory info via SSH
|
||||
mem_result = subprocess.run([
|
||||
"ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5",
|
||||
f"tony@{ip}", "free -h | grep '^Mem:' | awk '{print $2}'"
|
||||
], capture_output=True, text=True, timeout=10)
|
||||
|
||||
if mem_result.returncode == 0 and mem_result.stdout.strip():
|
||||
memory_info = mem_result.stdout.strip()
|
||||
hardware["memory"] = memory_info
|
||||
|
||||
# Try to get CPU info via SSH
|
||||
cpu_result = subprocess.run([
|
||||
"ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5",
|
||||
f"tony@{ip}", "lscpu | grep 'Model name:' | cut -d':' -f2- | xargs"
|
||||
], capture_output=True, text=True, timeout=10)
|
||||
|
||||
if cpu_result.returncode == 0 and cpu_result.stdout.strip():
|
||||
cpu_info = cpu_result.stdout.strip()
|
||||
hardware["cpu"] = cpu_info
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting live hardware info for {hostname}: {e}")
|
||||
|
||||
return hardware
|
||||
|
||||
def _get_n8n_api_key(self) -> Optional[str]:
|
||||
"""Get n8n API key from secrets."""
|
||||
try:
|
||||
@@ -136,17 +196,35 @@ class ClusterService:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try to get live hardware info if node is online
|
||||
hardware_info = {
|
||||
"cpu": node_info["cpu"],
|
||||
"memory": node_info["memory"],
|
||||
"gpu": node_info["gpu"]
|
||||
}
|
||||
|
||||
if status == "online":
|
||||
try:
|
||||
print(f"🔍 Getting live hardware info for {node_id} ({node_info['ip']})")
|
||||
live_hardware = self._get_live_hardware_info(node_info["hostname"], node_info["ip"])
|
||||
print(f"📊 Live hardware detected for {node_id}: {live_hardware}")
|
||||
# Use live data if available, fallback to hardcoded values
|
||||
for key in ["cpu", "memory", "gpu"]:
|
||||
if live_hardware[key] != "Unknown":
|
||||
print(f"✅ Using live {key} for {node_id}: {live_hardware[key]}")
|
||||
hardware_info[key] = live_hardware[key]
|
||||
else:
|
||||
print(f"⚠️ Using fallback {key} for {node_id}: {hardware_info[key]}")
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to get live hardware info for {node_id}: {e}")
|
||||
|
||||
return {
|
||||
"id": node_id,
|
||||
"hostname": node_info["hostname"],
|
||||
"ip": node_info["ip"],
|
||||
"status": status,
|
||||
"role": node_info["role"],
|
||||
"hardware": {
|
||||
"cpu": node_info["cpu"],
|
||||
"memory": node_info["memory"],
|
||||
"gpu": node_info["gpu"]
|
||||
},
|
||||
"hardware": hardware_info,
|
||||
"model_count": model_count,
|
||||
"models": [{"name": m["name"], "size": m.get("size", 0)} for m in models],
|
||||
"metrics": {
|
||||
|
||||
Reference in New Issue
Block a user