hive/backend/app/services/project_service.py

"""
Project Service for integrating with local project directories and GitHub.
"""
import os
import json
import re
from pathlib import Path
from typing import List, Dict, Optional, Any
from datetime import datetime
import requests
from requests.auth import HTTPBasicAuth
import markdown
from app.models.project import Project

class ProjectService:
    def __init__(self):
        self.projects_base_path = Path("/home/tony/AI/projects")
        self.github_token = self._get_github_token()
        self.github_api_base = "https://api.github.com"

    def _get_github_token(self) -> Optional[str]:
        """Get GitHub token from Docker secret or secrets file."""
        try:
            # Try Docker secret first (more secure)
            docker_secret_path = Path("/run/secrets/github_token")
            if docker_secret_path.exists():
                return docker_secret_path.read_text().strip()

            # Try gh-token from filesystem (fallback)
            gh_token_path = Path("/home/tony/AI/secrets/passwords_and_tokens/gh-token")
            if gh_token_path.exists():
                return gh_token_path.read_text().strip()

            # Try GitHub token from filesystem
            github_token_path = Path("/home/tony/AI/secrets/passwords_and_tokens/github-token")
            if github_token_path.exists():
                return github_token_path.read_text().strip()

            # Fallback to GitLab token if GitHub token doesn't exist
            gitlab_token_path = Path("/home/tony/AI/secrets/passwords_and_tokens/claude-gitlab-token")
            if gitlab_token_path.exists():
                return gitlab_token_path.read_text().strip()
        except Exception as e:
            print(f"Error reading GitHub token: {e}")
        return None

    def get_all_projects(self) -> List[Dict[str, Any]]:
        """Get all projects from the local filesystem."""
        projects = []

        if not self.projects_base_path.exists():
            return projects

        for project_dir in self.projects_base_path.iterdir():
            if project_dir.is_dir() and not project_dir.name.startswith('.'):
                project_data = self._analyze_project_directory(project_dir)
                if project_data:
                    projects.append(project_data)

        # Sort by last modified date
        projects.sort(key=lambda x: x.get('updated_at', ''), reverse=True)
        return projects

    def get_project_by_id(self, project_id: str) -> Optional[Dict[str, Any]]:
        """Get a specific project by ID (directory name)."""
        project_path = self.projects_base_path / project_id
        if not project_path.exists() or not project_path.is_dir():
            return None

        return self._analyze_project_directory(project_path)

    def _analyze_project_directory(self, project_path: Path) -> Optional[Dict[str, Any]]:
        """Analyze a project directory and extract metadata."""
        try:
            project_id = project_path.name

            # Skip if this is the hive project itself
            if project_id == 'hive':
                return None

            # Get basic file info
            stat = project_path.stat()
            created_at = datetime.fromtimestamp(stat.st_ctime).isoformat()
            updated_at = datetime.fromtimestamp(stat.st_mtime).isoformat()

            # Read PROJECT_PLAN.md if it exists
            project_plan_path = project_path / "PROJECT_PLAN.md"
            project_plan_content = ""
            description = ""
            if project_plan_path.exists():
                project_plan_content = project_plan_path.read_text(encoding='utf-8')
                description = self._extract_description_from_plan(project_plan_content)

            # Read TODOS.md if it exists
            todos_path = project_path / "TODOS.md"
            todos_content = ""
            if todos_path.exists():
                todos_content = todos_path.read_text(encoding='utf-8')

            # Check for GitHub repository
            git_config_path = project_path / ".git" / "config"
            github_repo = None
            if git_config_path.exists():
                github_repo = self._extract_github_repo(git_config_path)

            # Determine project status
            status = self._determine_project_status(project_path, todos_content)

            # Extract tags from content
            tags = self._extract_tags(project_plan_content, project_path)

            # Get workflow count (look for workflow-related files)
            workflow_count = self._count_workflows(project_path)

            # Build project data
            project_data = {
                "id": project_id,
                "name": self._format_project_name(project_id),
                "description": description or f"Project in {project_id}",
                "status": status,
                "created_at": created_at,
                "updated_at": updated_at,
                "tags": tags,
                "github_repo": github_repo,
                "workflow_count": workflow_count,
                "has_project_plan": project_plan_path.exists(),
                "has_todos": todos_path.exists(),
                "file_count": len(list(project_path.rglob("*"))),
                "metadata": {
                    "project_plan_path": str(project_plan_path) if project_plan_path.exists() else None,
                    "todos_path": str(todos_path) if todos_path.exists() else None,
                    "directory_size": self._get_directory_size(project_path)
                }
            }

            return project_data

        except Exception as e:
            print(f"Error analyzing project directory {project_path}: {e}")
            return None

    def _extract_description_from_plan(self, content: str) -> str:
        """Extract description from PROJECT_PLAN.md content."""
        lines = content.split('\n')
        description_lines = []
        in_description = False

        for line in lines:
            line = line.strip()
            if not line:
                continue

            # Look for overview, description, or objective sections
            if re.match(r'^#+\s*(overview|description|objective|project\s+description)', line, re.IGNORECASE):
                in_description = True
                continue
            elif line.startswith('#') and in_description:
                break
            elif in_description and not line.startswith('#'):
                description_lines.append(line)
                if len(description_lines) >= 2:  # Limit to first 2 lines
                    break

        description = ' '.join(description_lines).strip()

        # If no description found, try to get from the beginning
        if not description:
            for line in lines:
                line = line.strip()
                if line and not line.startswith('#') and not line.startswith('```'):
                    description = line
                    break

        return description[:200] + "..." if len(description) > 200 else description

    def _extract_github_repo(self, git_config_path: Path) -> Optional[str]:
        """Extract GitHub repository URL from git config."""
        try:
            config_content = git_config_path.read_text()

            # Look for GitHub remote URL
            for line in config_content.split('\n'):
                if 'github.com' in line and ('url =' in line or 'url=' in line):
                    url = line.split('=', 1)[1].strip()

                    # Extract repo name from URL
                    if 'github.com/' in url:
                        repo_part = url.split('github.com/')[-1]
                        if repo_part.endswith('.git'):
                            repo_part = repo_part[:-4]
                        return repo_part

        except Exception:
            pass
        return None

    def _determine_project_status(self, project_path: Path, todos_content: str) -> str:
        """Determine project status based on various indicators."""
        # Check for recent activity (files modified in last 30 days)
        recent_activity = False
        thirty_days_ago = datetime.now().timestamp() - (30 * 24 * 60 * 60)

        try:
            for file_path in project_path.rglob("*"):
                if file_path.is_file() and file_path.stat().st_mtime > thirty_days_ago:
                    recent_activity = True
                    break
        except Exception:
            pass

        # Check TODOS for status indicators
        if todos_content:
            content_lower = todos_content.lower()
            if any(keyword in content_lower for keyword in ['completed', 'done', 'finished']):
                if not recent_activity:
                    return "archived"
            if any(keyword in content_lower for keyword in ['in progress', 'active', 'working']):
                return "active"

        # Check for deployment files
        deployment_files = ['Dockerfile', 'docker-compose.yml', 'deploy.sh', 'package.json']
        has_deployment = any((project_path / f).exists() for f in deployment_files)

        if recent_activity:
            return "active"
        elif has_deployment:
            return "inactive"
        else:
            return "draft"

    def _extract_tags(self, content: str, project_path: Path) -> List[str]:
        """Extract tags based on content and file analysis."""
        tags = []

        if content:
            content_lower = content.lower()

            # Technology tags
            tech_tags = {
                'python': ['python', '.py'],
                'javascript': ['javascript', 'js', 'node'],
                'typescript': ['typescript', 'ts'],
                'react': ['react', 'jsx'],
                'docker': ['docker', 'dockerfile'],
                'ai': ['ai', 'ml', 'machine learning', 'neural', 'model'],
                'web': ['web', 'frontend', 'backend', 'api'],
                'automation': ['automation', 'workflow', 'n8n'],
                'infrastructure': ['infrastructure', 'deployment', 'devops'],
                'mobile': ['mobile', 'ios', 'android', 'swift'],
                'data': ['data', 'database', 'sql', 'analytics'],
                'security': ['security', 'auth', 'authentication']
            }

            for tag, keywords in tech_tags.items():
                if any(keyword in content_lower for keyword in keywords):
                    tags.append(tag)

        # File-based tags
        files = list(project_path.rglob("*"))
        file_extensions = [f.suffix.lower() for f in files if f.is_file()]

        if '.py' in file_extensions:
            tags.append('python')
        if '.js' in file_extensions or '.ts' in file_extensions:
            tags.append('javascript')
        if any(f.name == 'Dockerfile' for f in files):
            tags.append('docker')
        if any(f.name == 'package.json' for f in files):
            tags.append('node')

        return list(set(tags))  # Remove duplicates

    def _count_workflows(self, project_path: Path) -> int:
        """Count workflow-related files in the project."""
        workflow_patterns = [
            '*.yml', '*.yaml',  # GitHub Actions, Docker Compose
            '*.json',  # n8n workflows, package.json
            'workflow*', 'Workflow*',
            '*workflow*'
        ]

        count = 0
        for pattern in workflow_patterns:
            count += len(list(project_path.rglob(pattern)))

        return min(count, 20)  # Cap at reasonable number

    def _format_project_name(self, project_id: str) -> str:
        """Format project directory name into a readable project name."""
        # Convert kebab-case and snake_case to Title Case
        name = project_id.replace('-', ' ').replace('_', ' ')
        return ' '.join(word.capitalize() for word in name.split())

    def _get_directory_size(self, path: Path) -> int:
        """Get total size of directory in bytes."""
        total_size = 0
        try:
            for file_path in path.rglob("*"):
                if file_path.is_file():
                    total_size += file_path.stat().st_size
        except Exception:
            pass
        return total_size

    def get_project_metrics(self, project_id: str) -> Optional[Dict[str, Any]]:
        """Get detailed metrics for a project."""
        project_path = self.projects_base_path / project_id
        if not project_path.exists():
            return None

        # Get GitHub issues count if repo exists
        github_repo = None
        git_config_path = project_path / ".git" / "config"
        if git_config_path.exists():
            github_repo = self._extract_github_repo(git_config_path)

        github_issues = 0
        github_open_issues = 0
        if github_repo and self.github_token:
            try:
                issues_data = self._get_github_issues(github_repo)
                github_issues = len(issues_data)
                github_open_issues = len([i for i in issues_data if i['state'] == 'open'])
            except Exception:
                pass

        # Count workflows
        workflow_count = self._count_workflows(project_path)

        # Analyze TODO file
        todos_path = project_path / "TODOS.md"
        completed_tasks = 0
        total_tasks = 0
        if todos_path.exists():
            todos_content = todos_path.read_text()
            # Count checkboxes
            total_tasks = len(re.findall(r'- \[[ x]\]', todos_content))
            completed_tasks = len(re.findall(r'- \[x\]', todos_content))

        # Get last activity
        last_activity = None
        try:
            latest_file = None
            latest_time = 0
            for file_path in project_path.rglob("*"):
                if file_path.is_file():
                    mtime = file_path.stat().st_mtime
                    if mtime > latest_time:
                        latest_time = mtime
                        latest_file = file_path

            if latest_file:
                last_activity = datetime.fromtimestamp(latest_time).isoformat()
        except Exception:
            pass

        return {
            "total_workflows": workflow_count,
            "active_workflows": max(0, workflow_count - 1) if workflow_count > 0 else 0,
            "total_tasks": total_tasks,
            "completed_tasks": completed_tasks,
            "github_issues": github_issues,
            "github_open_issues": github_open_issues,
            "task_completion_rate": completed_tasks / total_tasks if total_tasks > 0 else 0,
            "last_activity": last_activity
        }

    def _get_github_issues(self, repo: str) -> List[Dict]:
        """Fetch GitHub issues for a repository."""
        if not self.github_token:
            return []

        try:
            url = f"{self.github_api_base}/repos/{repo}/issues"
            headers = {
                "Authorization": f"token {self.github_token}",
                "Accept": "application/vnd.github.v3+json"
            }

            response = requests.get(url, headers=headers, timeout=10)
            if response.status_code == 200:
                return response.json()
        except Exception as e:
            print(f"Error fetching GitHub issues for {repo}: {e}")

        return []

    def get_project_tasks(self, project_id: str) -> List[Dict[str, Any]]:
        """Get tasks for a project (from GitHub issues and TODOS.md)."""
        tasks = []

        # Get GitHub issues
        project_path = self.projects_base_path / project_id
        git_config_path = project_path / ".git" / "config"
        if git_config_path.exists():
            github_repo = self._extract_github_repo(git_config_path)
            if github_repo:
                github_issues = self._get_github_issues(github_repo)
                for issue in github_issues:
                    tasks.append({
                        "id": f"gh-{issue['number']}",
                        "title": issue['title'],
                        "description": issue.get('body', ''),
                        "status": "open" if issue['state'] == 'open' else "closed",
                        "type": "github_issue",
                        "created_at": issue['created_at'],
                        "updated_at": issue['updated_at'],
                        "url": issue['html_url'],
                        "labels": [label['name'] for label in issue.get('labels', [])]
                    })

        # Get TODOS from TODOS.md
        todos_path = project_path / "TODOS.md"
        if todos_path.exists():
            todos_content = todos_path.read_text()
            todo_items = self._parse_todos_markdown(todos_content)
            tasks.extend(todo_items)

        return tasks

    def _parse_todos_markdown(self, content: str) -> List[Dict[str, Any]]:
        """Parse TODOS.md content into structured tasks."""
        tasks = []
        lines = content.split('\n')

        for i, line in enumerate(lines):
            line = line.strip()

            # Look for checkbox items
            checkbox_match = re.match(r'- \[([x ])\]\s*(.+)', line)
            if checkbox_match:
                is_completed = checkbox_match.group(1) == 'x'
                task_text = checkbox_match.group(2)

                tasks.append({
                    "id": f"todo-{i}",
                    "title": task_text,
                    "description": "",
                    "status": "completed" if is_completed else "open",
                    "type": "todo",
                    "created_at": None,
                    "updated_at": None,
                    "url": None,
                    "labels": []
                })

        return tasks

    # === Bzzz Integration Methods ===

    def get_bzzz_active_repositories(self) -> List[Dict[str, Any]]:
        """Get list of repositories enabled for Bzzz consumption from database."""
        import psycopg2
        from psycopg2.extras import RealDictCursor

        active_repos = []

        try:
            print("DEBUG: Attempting to connect to database...")
            # Connect to database
            conn = psycopg2.connect(
                host="postgres",
                port=5432,
                database="hive",
                user="hive",
                password="hivepass"
            )
            print("DEBUG: Database connection successful")

            with conn.cursor(cursor_factory=RealDictCursor) as cursor:
                # Query projects where bzzz_enabled is true
                print("DEBUG: Executing query for bzzz-enabled projects...")
                cursor.execute("""
                    SELECT id, name, description, git_url, git_owner, git_repository,
                           git_branch, bzzz_enabled, ready_to_claim, private_repo, github_token_required
                    FROM projects
                    WHERE bzzz_enabled = true AND git_url IS NOT NULL
                """)

                db_projects = cursor.fetchall()
                print(f"DEBUG: Found {len(db_projects)} bzzz-enabled projects in database")

                for project in db_projects:
                    print(f"DEBUG: Processing project {project['name']} (ID: {project['id']})")
                    # For each enabled project, check if it has bzzz-task issues
                    project_id = project['id']
                    github_repo = f"{project['git_owner']}/{project['git_repository']}"
                    print(f"DEBUG: Checking GitHub repo: {github_repo}")

                    # Check for bzzz-task issues
                    bzzz_tasks = self._get_github_bzzz_tasks(github_repo)
                    has_tasks = len(bzzz_tasks) > 0
                    print(f"DEBUG: Found {len(bzzz_tasks)} bzzz-task issues, has_tasks={has_tasks}")

                    active_repos.append({
                        "project_id": project_id,
                        "name": project['name'],
                        "git_url": project['git_url'],
                        "owner": project['git_owner'],
                        "repository": project['git_repository'],
                        "branch": project['git_branch'] or "main",
                        "bzzz_enabled": project['bzzz_enabled'],
                        "ready_to_claim": has_tasks,
                        "private_repo": project['private_repo'],
                        "github_token_required": project['github_token_required']
                    })

            conn.close()
            print(f"DEBUG: Returning {len(active_repos)} active repositories")

        except Exception as e:
            print(f"Error fetching bzzz active repositories: {e}")
            import traceback
            print(f"DEBUG: Exception traceback: {traceback.format_exc()}")
            # Fallback to filesystem method if database fails
            return self._get_bzzz_active_repositories_filesystem()

        return active_repos

    def _get_github_bzzz_tasks(self, github_repo: str) -> List[Dict[str, Any]]:
        """Fetch GitHub issues with bzzz-task label for a repository."""
        if not self.github_token:
            return []

        try:
            url = f"{self.github_api_base}/repos/{github_repo}/issues"
            headers = {
                "Authorization": f"token {self.github_token}",
                "Accept": "application/vnd.github.v3+json"
            }
            params = {
                "labels": "bzzz-task",
                "state": "open"
            }

            response = requests.get(url, headers=headers, params=params, timeout=10)
            if response.status_code == 200:
                return response.json()
        except Exception as e:
            print(f"Error fetching bzzz-task issues for {github_repo}: {e}")

        return []

    def _get_bzzz_active_repositories_filesystem(self) -> List[Dict[str, Any]]:
        """Fallback method using filesystem scan for bzzz repositories."""
        active_repos = []

        # Get all projects and filter for those with GitHub repos
        all_projects = self.get_all_projects()

        for project in all_projects:
            github_repo = project.get('github_repo')
            if not github_repo:
                continue

            # Check if project has bzzz-task issues (indicating Bzzz readiness)
            project_id = project['id']
            bzzz_tasks = self.get_bzzz_project_tasks(project_id)

            # Only include projects that have bzzz-task labeled issues
            if bzzz_tasks:
                # Parse GitHub repo URL
                repo_parts = github_repo.split('/')
                if len(repo_parts) >= 2:
                    owner = repo_parts[0]
                    repository = repo_parts[1]

                    active_repos.append({
                        "project_id": hash(project_id) % 1000000,  # Simple numeric ID for compatibility
                        "name": project['name'],
                        "git_url": f"https://github.com/{github_repo}",
                        "owner": owner,
                        "repository": repository,
                        "branch": "main",  # Default branch
                        "bzzz_enabled": True,
                        "ready_to_claim": len(bzzz_tasks) > 0,
                        "private_repo": False,  # TODO: Detect from GitHub API
                        "github_token_required": False  # TODO: Implement token requirement logic
                    })

        return active_repos

    def get_bzzz_project_tasks(self, project_id: str) -> List[Dict[str, Any]]:
        """Get GitHub issues with bzzz-task label for a specific project."""
        project_path = self.projects_base_path / project_id
        if not project_path.exists():
            return []

        # Get GitHub repository
        git_config_path = project_path / ".git" / "config"
        if not git_config_path.exists():
            return []

        github_repo = self._extract_github_repo(git_config_path)
        if not github_repo:
            return []

        # Fetch issues with bzzz-task label
        if not self.github_token:
            return []

        try:
            url = f"{self.github_api_base}/repos/{github_repo}/issues"
            headers = {
                "Authorization": f"token {self.github_token}",
                "Accept": "application/vnd.github.v3+json"
            }
            params = {
                "labels": "bzzz-task",
                "state": "open"
            }

            response = requests.get(url, headers=headers, params=params, timeout=10)
            if response.status_code == 200:
                issues = response.json()

                # Convert to Bzzz format
                bzzz_tasks = []
                for issue in issues:
                    # Check if already claimed (has assignee)
                    is_claimed = bool(issue.get('assignees'))

                    bzzz_tasks.append({
                        "number": issue['number'],
                        "title": issue['title'],
                        "description": issue.get('body', ''),
                        "state": issue['state'],
                        "labels": [label['name'] for label in issue.get('labels', [])],
                        "created_at": issue['created_at'],
                        "updated_at": issue['updated_at'],
                        "html_url": issue['html_url'],
                        "is_claimed": is_claimed,
                        "assignees": [assignee['login'] for assignee in issue.get('assignees', [])],
                        "task_type": self._determine_task_type(issue)
                    })

                return bzzz_tasks

        except Exception as e:
            print(f"Error fetching bzzz-task issues for {github_repo}: {e}")

        return []

    def _determine_task_type(self, issue: Dict) -> str:
        """Determine the task type from GitHub issue labels and content."""
        labels = [label['name'].lower() for label in issue.get('labels', [])]
        title_lower = issue['title'].lower()
        body_lower = (issue.get('body') or '').lower()

        # Map common labels to task types
        type_mappings = {
            'bug': ['bug', 'error', 'fix'],
            'feature': ['feature', 'enhancement', 'new'],
            'documentation': ['docs', 'documentation', 'readme'],
            'refactor': ['refactor', 'cleanup', 'optimization'],
            'testing': ['test', 'testing', 'qa'],
            'infrastructure': ['infra', 'deployment', 'devops', 'ci/cd'],
            'security': ['security', 'vulnerability', 'auth'],
            'ui/ux': ['ui', 'ux', 'frontend', 'design']
        }

        for task_type, keywords in type_mappings.items():
            if any(keyword in labels for keyword in keywords) or \
               any(keyword in title_lower for keyword in keywords) or \
               any(keyword in body_lower for keyword in keywords):
                return task_type

        return 'general'

    def claim_bzzz_task(self, project_id: str, task_number: int, agent_id: str) -> str:
        """Register task claim with Hive system."""
        # For now, just log the claim - in future this would update a database
        claim_id = f"{project_id}-{task_number}-{agent_id}"
        print(f"Bzzz task claimed: Project {project_id}, Task #{task_number}, Agent {agent_id}")

        # TODO: Store claim in database with timestamp
        # TODO: Update GitHub issue assignee if GitHub token has write access

        return claim_id

    def update_bzzz_task_status(self, project_id: str, task_number: int, status: str, metadata: Dict[str, Any]) -> None:
        """Update task status in Hive system."""
        print(f"Bzzz task status update: Project {project_id}, Task #{task_number}, Status: {status}")
        print(f"Metadata: {metadata}")

        # TODO: Store status update in database
        # TODO: Update GitHub issue status/comments if applicable

        # Handle escalation status
        if status == "escalated":
            print(f"Task escalated for human review: {metadata}")
            # TODO: Trigger N8N webhook for human escalation

    def update_project(self, project_id: str, project_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        """Update a project configuration."""
        try:
            # For now, projects are read-only from the filesystem
            # This could be extended to update project metadata files
            project = self.get_project_by_id(project_id)
            if not project:
                return None

            # Update project metadata in a local JSON file if needed
            # For now, just return the existing project as projects are filesystem-based
            print(f"Project update request for {project_id}: {project_data}")
            return project
        except Exception as e:
            print(f"Error updating project {project_id}: {e}")
            return None

    def create_project(self, project_data: Dict[str, Any]) -> Dict[str, Any]:
        """Create a new project."""
        try:
            # For now, projects are filesystem-based and read-only
            # This could be extended to create new project directories
            print(f"Project creation request: {project_data}")

            # Return a mock project for now
            project_id = project_data.get("name", "new-project").lower().replace(" ", "-")
            return {
                "id": project_id,
                "name": project_data.get("name", "New Project"),
                "description": project_data.get("description", ""),
                "status": "created",
                "created_at": datetime.now().isoformat(),
                "updated_at": datetime.now().isoformat()
            }
        except Exception as e:
            print(f"Error creating project: {e}")
            raise

    def delete_project(self, project_id: str) -> bool:
        """Delete a project."""
        try:
            # For now, projects are filesystem-based and read-only
            # This could be extended to archive or remove project directories
            project = self.get_project_by_id(project_id)
            if not project:
                return False

            print(f"Project deletion request for {project_id}")
            # Return success for now (projects are read-only)
            return True
        except Exception as e:
            print(f"Error deleting project {project_id}: {e}")
            return False