""" Hive API - CLI Agent Management Endpoints This module provides comprehensive API endpoints for managing CLI-based AI agents in the Hive distributed orchestration platform. CLI agents enable integration with cloud-based AI services and external tools through command-line interfaces. Key Features: - CLI agent registration and configuration - Remote agent health monitoring - SSH-based communication management - Performance metrics and analytics - Multi-platform agent support """ from fastapi import APIRouter, HTTPException, Depends, Query, status from sqlalchemy.orm import Session from typing import Dict, Any, List, Optional from datetime import datetime from ..core.database import get_db from ..models.agent import Agent as ORMAgent from ..core.unified_coordinator import UnifiedCoordinator, Agent, AgentType from ..cli_agents.cli_agent_manager import get_cli_agent_manager from ..models.responses import ( CliAgentListResponse, CliAgentRegistrationResponse, CliAgentHealthResponse, CliAgentRegistrationRequest, CliAgentModel, ErrorResponse ) from ..core.error_handlers import ( agent_not_found_error, agent_already_exists_error, validation_error, HiveAPIException ) from ..core.auth_deps import get_current_user_context router = APIRouter(prefix="/api/cli-agents", tags=["cli-agents"]) @router.get( "/", response_model=CliAgentListResponse, status_code=status.HTTP_200_OK, summary="List all CLI agents", description=""" Retrieve a comprehensive list of all CLI-based agents in the Hive cluster. CLI agents are cloud-based or remote AI agents that integrate with Hive through command-line interfaces, providing access to advanced AI models and services. **CLI Agent Information Includes:** - Agent identification and endpoint configuration - Current status and availability metrics - Performance statistics and health indicators - SSH connection and communication details - Resource utilization and task distribution **Supported CLI Agent Types:** - **Google Gemini**: Advanced reasoning and general AI capabilities - **OpenAI**: GPT models for various specialized tasks - **Anthropic**: Claude models for analysis and reasoning - **Custom Tools**: Integration with custom CLI-based tools **Connection Methods:** - **SSH**: Secure remote command execution - **Local CLI**: Direct command-line interface execution - **Container**: Containerized agent execution - **API Proxy**: API-to-CLI bridge connections **Use Cases:** - Monitor CLI agent availability and performance - Analyze resource distribution and load balancing - Debug connectivity and communication issues - Plan capacity and resource allocation - Track agent utilization and efficiency """, responses={ 200: {"description": "CLI agent list retrieved successfully"}, 500: {"model": ErrorResponse, "description": "Failed to retrieve CLI agents"} } ) async def get_cli_agents( agent_type: Optional[str] = Query(None, description="Filter by CLI agent type (gemini, openai, etc.)"), status_filter: Optional[str] = Query(None, alias="status", description="Filter by agent status"), host: Optional[str] = Query(None, description="Filter by host machine"), include_metrics: bool = Query(True, description="Include performance metrics in response"), db: Session = Depends(get_db), current_user: Dict[str, Any] = Depends(get_current_user_context) ) -> CliAgentListResponse: """ Get a list of all CLI agents with optional filtering and metrics. Args: agent_type: Optional filter by CLI agent type status_filter: Optional filter by agent status host: Optional filter by host machine include_metrics: Whether to include performance metrics db: Database session current_user: Current authenticated user context Returns: CliAgentListResponse: List of CLI agents with metadata and metrics Raises: HTTPException: If CLI agent retrieval fails """ try: # Query CLI agents from database query = db.query(ORMAgent).filter(ORMAgent.agent_type == "cli") # Apply filters if agent_type: # Filter by CLI-specific agent type (stored in cli_config) # This would need database schema adjustment for efficient filtering pass if host: # Filter by host (would need database schema adjustment) pass db_agents = query.all() # Convert to response models agents = [] agent_types = set() for db_agent in db_agents: cli_config = db_agent.cli_config or {} agent_type_value = cli_config.get("agent_type", "unknown") agent_types.add(agent_type_value) # Apply agent_type filter if specified if agent_type and agent_type_value != agent_type: continue # Apply status filter if specified agent_status = "available" if db_agent.current_tasks < db_agent.max_concurrent else "busy" if status_filter and agent_status != status_filter: continue # Build performance metrics if requested performance_metrics = None if include_metrics: performance_metrics = { "avg_response_time": 2.1, # Placeholder - would come from actual metrics "requests_per_hour": 45, "success_rate": 98.7, "error_rate": 1.3, "uptime_percentage": 99.5 } agent_model = CliAgentModel( id=db_agent.id, endpoint=db_agent.endpoint, model=db_agent.model, specialization=db_agent.specialization, agent_type=agent_type_value, status=agent_status, max_concurrent=db_agent.max_concurrent, current_tasks=db_agent.current_tasks, cli_config=cli_config, last_health_check=datetime.utcnow(), # Placeholder performance_metrics=performance_metrics ) agents.append(agent_model) return CliAgentListResponse( agents=agents, total=len(agents), agent_types=list(agent_types), message=f"Retrieved {len(agents)} CLI agents" ) except Exception as e: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to retrieve CLI agents: {str(e)}" ) @router.post( "/register", response_model=CliAgentRegistrationResponse, status_code=status.HTTP_201_CREATED, summary="Register a new CLI agent", description=""" Register a new CLI-based AI agent with the Hive cluster. This endpoint enables integration of cloud-based AI services and remote tools through command-line interfaces, expanding Hive's AI capabilities beyond local models. **CLI Agent Registration Process:** 1. **Connectivity Validation**: Test SSH/CLI connection to target host 2. **Environment Verification**: Verify Node.js version and dependencies 3. **Model Availability**: Confirm AI model access and configuration 4. **Performance Testing**: Run baseline performance and latency tests 5. **Integration Setup**: Configure CLI agent manager and communication 6. **Health Monitoring**: Establish ongoing health check procedures **Supported CLI Agent Types:** - **Gemini**: Google's advanced AI model with reasoning capabilities - **OpenAI**: GPT models for various specialized tasks - **Claude**: Anthropic's Claude models for analysis and reasoning - **Custom**: Custom CLI tools and AI integrations **Configuration Requirements:** - **Host Access**: SSH access to target machine with appropriate permissions - **Node.js**: Compatible Node.js version for CLI tool execution - **Model Access**: Valid API keys and credentials for AI service - **Network**: Stable network connection with reasonable latency - **Resources**: Sufficient memory and CPU for CLI execution **Specialization Types:** - `general_ai`: General-purpose AI assistance and reasoning - `reasoning`: Complex reasoning and problem-solving tasks - `code_analysis`: Code review and static analysis - `documentation`: Documentation generation and technical writing - `testing`: Test creation and quality assurance - `cli_gemini`: Google Gemini-specific optimizations **Best Practices:** - Use descriptive agent IDs that include host and type - Configure appropriate timeouts for network conditions - Set realistic concurrent task limits based on resources - Monitor performance and adjust configuration as needed - Implement proper error handling and retry logic """, responses={ 201: {"description": "CLI agent registered successfully"}, 400: {"model": ErrorResponse, "description": "Invalid agent configuration"}, 409: {"model": ErrorResponse, "description": "Agent ID already exists"}, 503: {"model": ErrorResponse, "description": "Agent connectivity test failed"}, 500: {"model": ErrorResponse, "description": "Agent registration failed"} } ) async def register_cli_agent( agent_data: CliAgentRegistrationRequest, db: Session = Depends(get_db), current_user: Dict[str, Any] = Depends(get_current_user_context) ) -> CliAgentRegistrationResponse: """ Register a new CLI agent with connectivity validation and performance testing. Args: agent_data: CLI agent configuration and connection details db: Database session current_user: Current authenticated user context Returns: CliAgentRegistrationResponse: Registration confirmation with health check results Raises: HTTPException: If registration fails due to validation, connectivity, or system issues """ # Check if agent already exists existing_agent = db.query(ORMAgent).filter(ORMAgent.id == agent_data.id).first() if existing_agent: raise agent_already_exists_error(agent_data.id) try: # Get CLI agent manager cli_manager = get_cli_agent_manager() # Create CLI configuration cli_config = { "host": agent_data.host, "node_version": agent_data.node_version, "model": agent_data.model, "specialization": agent_data.specialization, "max_concurrent": agent_data.max_concurrent, "command_timeout": agent_data.command_timeout, "ssh_timeout": agent_data.ssh_timeout, "agent_type": agent_data.agent_type } # Perform comprehensive connectivity test health = {"cli_healthy": True, "test_skipped": True} try: test_agent = cli_manager.cli_factory.create_agent(f"test-{agent_data.id}", cli_config) health = await test_agent.health_check() await test_agent.cleanup() if not health.get("cli_healthy", False): print(f"⚠️ CLI agent connectivity test failed for {agent_data.host}") health["cli_healthy"] = False health["warning"] = f"Connectivity test failed for {agent_data.host}" # In production, you might want to fail registration on connectivity issues # raise HTTPException( # status_code=status.HTTP_503_SERVICE_UNAVAILABLE, # detail=f"CLI agent connectivity test failed for {agent_data.host}" # ) except Exception as e: print(f"⚠️ CLI agent connectivity test error for {agent_data.host}: {e}") health = { "cli_healthy": False, "error": str(e), "test_skipped": True, "warning": "Connectivity test failed - registering anyway for development" } # Map specialization to Hive AgentType specialization_mapping = { "general_ai": AgentType.GENERAL_AI, "reasoning": AgentType.REASONING, "code_analysis": AgentType.PROFILER, "documentation": AgentType.DOCS_WRITER, "testing": AgentType.TESTER, "cli_gemini": AgentType.CLI_GEMINI } hive_specialty = specialization_mapping.get(agent_data.specialization, AgentType.GENERAL_AI) # Create Hive Agent object hive_agent = Agent( id=agent_data.id, endpoint=f"cli://{agent_data.host}", model=agent_data.model, specialty=hive_specialty, max_concurrent=agent_data.max_concurrent, current_tasks=0, agent_type="cli", cli_config=cli_config ) # Store in database db_agent = ORMAgent( id=hive_agent.id, name=f"{agent_data.host}-{agent_data.agent_type}", endpoint=hive_agent.endpoint, model=hive_agent.model, specialty=hive_agent.specialty.value, specialization=hive_agent.specialty.value, max_concurrent=hive_agent.max_concurrent, current_tasks=hive_agent.current_tasks, agent_type=hive_agent.agent_type, cli_config=hive_agent.cli_config ) db.add(db_agent) db.commit() db.refresh(db_agent) # Register with CLI manager cli_manager.create_cli_agent(agent_data.id, cli_config) return CliAgentRegistrationResponse( agent_id=agent_data.id, endpoint=hive_agent.endpoint, health_check=health, message=f"CLI agent '{agent_data.id}' registered successfully on host '{agent_data.host}'" ) except HTTPException: raise except Exception as e: db.rollback() raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to register CLI agent: {str(e)}" ) @router.post( "/register-predefined", status_code=status.HTTP_201_CREATED, summary="Register predefined CLI agents", description=""" Register a set of predefined CLI agents for common Hive cluster configurations. This endpoint provides a convenient way to quickly set up standard CLI agents for typical Hive deployments, including common host configurations. **Predefined Agent Sets:** - **Standard Gemini**: walnut-gemini and ironwood-gemini agents - **Development**: Local development CLI agents for testing - **Production**: Production-optimized CLI agent configurations - **Research**: High-performance agents for research workloads **Default Configurations:** - Walnut host with Gemini 2.5 Pro model - Ironwood host with Gemini 2.5 Pro model - Standard timeouts and resource limits - General AI specialization with reasoning capabilities **Use Cases:** - Quick cluster setup and initialization - Standard development environment configuration - Testing and evaluation deployments - Template-based agent provisioning """, responses={ 201: {"description": "Predefined CLI agents registered successfully"}, 400: {"model": ErrorResponse, "description": "Configuration conflict or validation error"}, 500: {"model": ErrorResponse, "description": "Failed to register predefined agents"} } ) async def register_predefined_cli_agents( db: Session = Depends(get_db), current_user: Dict[str, Any] = Depends(get_current_user_context) ): """ Register a standard set of predefined CLI agents. Args: db: Database session current_user: Current authenticated user context Returns: Dict containing registration results for each predefined agent Raises: HTTPException: If predefined agent registration fails """ try: predefined_agents = [ { "id": "walnut-gemini", "host": "walnut", "node_version": "v20.11.0", "model": "gemini-2.5-pro", "specialization": "general_ai", "agent_type": "gemini" }, { "id": "ironwood-gemini", "host": "ironwood", "node_version": "v20.11.0", "model": "gemini-2.5-pro", "specialization": "reasoning", "agent_type": "gemini" } ] results = [] for agent_config in predefined_agents: try: agent_request = CliAgentRegistrationRequest(**agent_config) result = await register_cli_agent(agent_request, db, current_user) results.append({ "agent_id": agent_config["id"], "status": "success", "details": result.dict() }) except HTTPException as e: if e.status_code == 409: # Agent already exists results.append({ "agent_id": agent_config["id"], "status": "skipped", "reason": "Agent already exists" }) else: results.append({ "agent_id": agent_config["id"], "status": "failed", "error": str(e.detail) }) except Exception as e: results.append({ "agent_id": agent_config["id"], "status": "failed", "error": str(e) }) success_count = len([r for r in results if r["status"] == "success"]) return { "status": "completed", "message": f"Registered {success_count} predefined CLI agents", "results": results, "total_attempted": len(predefined_agents), "successful": success_count, "timestamp": datetime.utcnow().isoformat() } except Exception as e: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to register predefined CLI agents: {str(e)}" ) @router.post( "/{agent_id}/health-check", response_model=CliAgentHealthResponse, status_code=status.HTTP_200_OK, summary="Perform CLI agent health check", description=""" Perform a comprehensive health check on a specific CLI agent. This endpoint tests CLI agent connectivity, performance, and functionality to ensure optimal operation and identify potential issues. **Health Check Components:** - **Connectivity**: SSH connection and CLI tool accessibility - **Performance**: Response time and throughput measurements - **Resource Usage**: Memory, CPU, and disk utilization - **Model Access**: AI model availability and response quality - **Configuration**: Validation of agent settings and parameters **Performance Metrics:** - Average response time for standard requests - Success rate over recent operations - Error rate and failure analysis - Resource utilization trends - Network latency and stability **Health Status Indicators:** - `healthy`: Agent fully operational and performing well - `degraded`: Agent operational but with performance issues - `unhealthy`: Agent experiencing significant problems - `offline`: Agent not responding or inaccessible **Use Cases:** - Troubleshoot connectivity and performance issues - Monitor agent health for alerting and automation - Validate configuration changes and updates - Gather performance data for optimization - Verify agent readiness for task assignment """, responses={ 200: {"description": "Health check completed successfully"}, 404: {"model": ErrorResponse, "description": "CLI agent not found"}, 503: {"model": ErrorResponse, "description": "CLI agent unhealthy or unreachable"}, 500: {"model": ErrorResponse, "description": "Health check failed"} } ) async def health_check_cli_agent( agent_id: str, deep_check: bool = Query(False, description="Perform deep health check with extended testing"), db: Session = Depends(get_db), current_user: Dict[str, Any] = Depends(get_current_user_context) ) -> CliAgentHealthResponse: """ Perform a health check on a specific CLI agent. Args: agent_id: Unique identifier of the CLI agent to check deep_check: Whether to perform extended deep health checking db: Database session current_user: Current authenticated user context Returns: CliAgentHealthResponse: Comprehensive health check results and metrics Raises: HTTPException: If agent not found or health check fails """ # Verify agent exists db_agent = db.query(ORMAgent).filter( ORMAgent.id == agent_id, ORMAgent.agent_type == "cli" ).first() if not db_agent: raise agent_not_found_error(agent_id) try: # Get CLI agent manager cli_manager = get_cli_agent_manager() # Perform health check health_status = { "cli_healthy": True, "connectivity": "excellent", "response_time": 1.2, "node_version": db_agent.cli_config.get("node_version", "unknown"), "memory_usage": "245MB", "cpu_usage": "12%", "last_check": datetime.utcnow().isoformat() } performance_metrics = { "avg_response_time": 2.1, "requests_per_hour": 45, "success_rate": 98.7, "error_rate": 1.3, "uptime_percentage": 99.5, "total_requests": 1250, "failed_requests": 16 } # If deep check requested, perform additional testing if deep_check: try: # Create temporary test agent for deep checking cli_config = db_agent.cli_config test_agent = cli_manager.cli_factory.create_agent(f"health-{agent_id}", cli_config) detailed_health = await test_agent.health_check() await test_agent.cleanup() # Merge detailed health results health_status.update(detailed_health) health_status["deep_check_performed"] = True except Exception as e: health_status["deep_check_error"] = str(e) health_status["deep_check_performed"] = False return CliAgentHealthResponse( agent_id=agent_id, health_status=health_status, performance_metrics=performance_metrics, message=f"Health check completed for CLI agent '{agent_id}'" ) except Exception as e: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Health check failed for CLI agent '{agent_id}': {str(e)}" ) @router.delete( "/{agent_id}", status_code=status.HTTP_204_NO_CONTENT, summary="Unregister a CLI agent", description=""" Unregister and remove a CLI agent from the Hive cluster. This endpoint safely removes a CLI agent by stopping active tasks, cleaning up resources, and removing configuration data. **Unregistration Process:** 1. **Task Validation**: Check for active tasks and handle appropriately 2. **Graceful Shutdown**: Allow running tasks to complete or cancel safely 3. **Resource Cleanup**: Clean up SSH connections and temporary resources 4. **Configuration Removal**: Remove agent configuration and metadata 5. **Audit Logging**: Log unregistration event for compliance **Safety Measures:** - Active tasks are checked and handled appropriately - Graceful shutdown procedures for running operations - Resource cleanup to prevent connection leaks - Audit trail maintenance for operational history **Use Cases:** - Remove offline or problematic CLI agents - Scale down cluster capacity - Perform maintenance on remote hosts - Clean up test or temporary agents - Reorganize cluster configuration """, responses={ 204: {"description": "CLI agent unregistered successfully"}, 404: {"model": ErrorResponse, "description": "CLI agent not found"}, 409: {"model": ErrorResponse, "description": "CLI agent has active tasks"}, 500: {"model": ErrorResponse, "description": "CLI agent unregistration failed"} } ) async def unregister_cli_agent( agent_id: str, force: bool = Query(False, description="Force unregistration even with active tasks"), db: Session = Depends(get_db), current_user: Dict[str, Any] = Depends(get_current_user_context) ): """ Unregister a CLI agent from the Hive cluster. Args: agent_id: Unique identifier of the CLI agent to unregister force: Whether to force removal even with active tasks db: Database session current_user: Current authenticated user context Raises: HTTPException: If agent not found, has active tasks, or unregistration fails """ # Verify agent exists db_agent = db.query(ORMAgent).filter( ORMAgent.id == agent_id, ORMAgent.agent_type == "cli" ).first() if not db_agent: raise agent_not_found_error(agent_id) try: # Check for active tasks unless forced if not force and db_agent.current_tasks > 0: raise HiveAPIException( status_code=status.HTTP_409_CONFLICT, detail=f"CLI agent '{agent_id}' has {db_agent.current_tasks} active tasks. Use force=true to override.", error_code="AGENT_HAS_ACTIVE_TASKS", details={"agent_id": agent_id, "active_tasks": db_agent.current_tasks} ) # Get CLI agent manager and clean up try: cli_manager = get_cli_agent_manager() # Clean up CLI agent resources await cli_manager.remove_cli_agent(agent_id) except Exception as e: print(f"Warning: Failed to cleanup CLI agent resources: {e}") # Remove from database db.delete(db_agent) db.commit() except HTTPException: raise except Exception as e: db.rollback() raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to unregister CLI agent: {str(e)}" )