""" WHOOSH AI Models API - Phase 6.1 REST API endpoints for AI model management and usage """ from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks from typing import List, Dict, Any, Optional from pydantic import BaseModel import logging from app.services.ai_model_service import ai_model_service, ModelCapability, AIModel from app.core.auth_deps import get_current_user from app.models.user import User logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/ai-models", tags=["AI Models"]) # Request/Response Models class CompletionRequest(BaseModel): prompt: str model_name: Optional[str] = None system_prompt: Optional[str] = None max_tokens: int = 1000 temperature: float = 0.7 task_type: Optional[str] = None context_requirements: int = 2048 class CompletionResponse(BaseModel): success: bool content: Optional[str] = None model: str response_time: Optional[float] = None usage_stats: Optional[Dict[str, Any]] = None error: Optional[str] = None class ModelInfo(BaseModel): name: str node_url: str capabilities: List[str] context_length: int parameter_count: str specialization: Optional[str] = None performance_score: float availability: bool usage_count: int avg_response_time: float class ClusterStatus(BaseModel): total_nodes: int healthy_nodes: int total_models: int models_by_capability: Dict[str, int] cluster_load: float model_usage_stats: Dict[str, Dict[str, Any]] class ModelSelectionRequest(BaseModel): task_type: str context_requirements: int = 2048 prefer_specialized: bool = True class CodeGenerationRequest(BaseModel): description: str language: str = "python" context: Optional[str] = None style: str = "clean" # clean, optimized, documented max_tokens: int = 2000 class CodeReviewRequest(BaseModel): code: str language: str focus_areas: List[str] = ["bugs", "performance", "security", "style"] severity_level: str = "medium" # low, medium, high @router.on_event("startup") async def startup_ai_service(): """Initialize AI model service on startup""" try: await ai_model_service.initialize() logger.info("AI Model Service initialized successfully") except Exception as e: logger.error(f"Failed to initialize AI Model Service: {e}") @router.on_event("shutdown") async def shutdown_ai_service(): """Cleanup AI model service on shutdown""" await ai_model_service.cleanup() @router.get("/status", response_model=ClusterStatus) async def get_cluster_status(current_user: User = Depends(get_current_user)): """Get comprehensive cluster status""" try: status = await ai_model_service.get_cluster_status() return ClusterStatus(**status) except Exception as e: logger.error(f"Error getting cluster status: {e}") raise HTTPException(status_code=500, detail="Failed to get cluster status") @router.get("/models", response_model=List[ModelInfo]) async def list_available_models(current_user: User = Depends(get_current_user)): """List all available AI models across the cluster""" try: models = [] for model in ai_model_service.models.values(): models.append(ModelInfo( name=model.name, node_url=model.node_url, capabilities=[cap.value for cap in model.capabilities], context_length=model.context_length, parameter_count=model.parameter_count, specialization=model.specialization, performance_score=model.performance_score, availability=model.availability, usage_count=model.usage_count, avg_response_time=model.avg_response_time )) return sorted(models, key=lambda x: x.name) except Exception as e: logger.error(f"Error listing models: {e}") raise HTTPException(status_code=500, detail="Failed to list models") @router.post("/select-model", response_model=ModelInfo) async def select_best_model( request: ModelSelectionRequest, current_user: User = Depends(get_current_user) ): """Select the best model for a specific task""" try: # Convert task_type string to enum try: task_capability = ModelCapability(request.task_type) except ValueError: raise HTTPException( status_code=400, detail=f"Invalid task type: {request.task_type}" ) model = await ai_model_service.get_best_model_for_task( task_type=task_capability, context_requirements=request.context_requirements, prefer_specialized=request.prefer_specialized ) if not model: raise HTTPException( status_code=404, detail="No suitable model found for the specified task" ) return ModelInfo( name=model.name, node_url=model.node_url, capabilities=[cap.value for cap in model.capabilities], context_length=model.context_length, parameter_count=model.parameter_count, specialization=model.specialization, performance_score=model.performance_score, availability=model.availability, usage_count=model.usage_count, avg_response_time=model.avg_response_time ) except HTTPException: raise except Exception as e: logger.error(f"Error selecting model: {e}") raise HTTPException(status_code=500, detail="Failed to select model") @router.post("/generate", response_model=CompletionResponse) async def generate_completion( request: CompletionRequest, current_user: User = Depends(get_current_user) ): """Generate completion using AI model""" try: model_name = request.model_name # Auto-select model if not specified if not model_name and request.task_type: try: task_capability = ModelCapability(request.task_type) best_model = await ai_model_service.get_best_model_for_task( task_type=task_capability, context_requirements=request.context_requirements ) if best_model: model_name = best_model.name except ValueError: pass if not model_name: # Default to first available model available_models = [m for m in ai_model_service.models.values() if m.availability] if not available_models: raise HTTPException(status_code=503, detail="No models available") model_name = available_models[0].name result = await ai_model_service.generate_completion( model_name=model_name, prompt=request.prompt, system_prompt=request.system_prompt, max_tokens=request.max_tokens, temperature=request.temperature ) return CompletionResponse(**result) except Exception as e: logger.error(f"Error generating completion: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/code/generate", response_model=CompletionResponse) async def generate_code( request: CodeGenerationRequest, current_user: User = Depends(get_current_user) ): """Generate code using AI models optimized for coding""" try: # Select best coding model coding_model = await ai_model_service.get_best_model_for_task( task_type=ModelCapability.CODE_GENERATION, context_requirements=max(2048, len(request.description) * 4) ) if not coding_model: raise HTTPException(status_code=503, detail="No coding models available") # Craft specialized prompt for code generation system_prompt = f"""You are an expert {request.language} programmer. Generate clean, well-documented, and efficient code. Style preferences: {request.style} Language: {request.language} Focus on: best practices, readability, and maintainability.""" prompt = f"""Generate {request.language} code for the following requirement: Description: {request.description} {f"Context: {request.context}" if request.context else ""} Please provide: 1. Clean, well-structured code 2. Appropriate comments and documentation 3. Error handling where relevant 4. Following {request.language} best practices Code:""" result = await ai_model_service.generate_completion( model_name=coding_model.name, prompt=prompt, system_prompt=system_prompt, max_tokens=request.max_tokens, temperature=0.3 # Lower temperature for more deterministic code ) return CompletionResponse(**result) except Exception as e: logger.error(f"Error generating code: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/code/review", response_model=CompletionResponse) async def review_code( request: CodeReviewRequest, current_user: User = Depends(get_current_user) ): """Review code using AI models optimized for code analysis""" try: # Select best code review model review_model = await ai_model_service.get_best_model_for_task( task_type=ModelCapability.CODE_REVIEW, context_requirements=max(4096, len(request.code) * 2) ) if not review_model: raise HTTPException(status_code=503, detail="No code review models available") # Craft specialized prompt for code review system_prompt = f"""You are an expert code reviewer specializing in {request.language}. Provide constructive, actionable feedback focusing on: {', '.join(request.focus_areas)}. Severity level: {request.severity_level} Be specific about line numbers and provide concrete suggestions for improvement.""" focus_description = { "bugs": "potential bugs and logic errors", "performance": "performance optimizations and efficiency", "security": "security vulnerabilities and best practices", "style": "code style, formatting, and conventions", "maintainability": "code maintainability and readability", "testing": "test coverage and testability" } focus_details = [focus_description.get(area, area) for area in request.focus_areas] prompt = f"""Please review this {request.language} code focusing on: {', '.join(focus_details)} Code to review: ```{request.language} {request.code} ``` Provide a detailed review including: 1. Overall assessment 2. Specific issues found (with line references if applicable) 3. Recommendations for improvement 4. Best practices that could be applied 5. Security considerations (if applicable) Review:""" result = await ai_model_service.generate_completion( model_name=review_model.name, prompt=prompt, system_prompt=system_prompt, max_tokens=2000, temperature=0.5 ) return CompletionResponse(**result) except Exception as e: logger.error(f"Error reviewing code: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/refresh-models") async def refresh_model_discovery( background_tasks: BackgroundTasks, current_user: User = Depends(get_current_user) ): """Refresh model discovery across the cluster""" try: background_tasks.add_task(ai_model_service.discover_cluster_models) return {"message": "Model discovery refresh initiated"} except Exception as e: logger.error(f"Error refreshing models: {e}") raise HTTPException(status_code=500, detail="Failed to refresh models") @router.get("/capabilities") async def list_model_capabilities(): """List all available model capabilities""" return { "capabilities": [ { "name": cap.value, "description": cap.value.replace("_", " ").title() } for cap in ModelCapability ] }