package intelligence import ( "crypto/md5" "crypto/rand" "encoding/hex" "encoding/json" "fmt" "io" "math" "os" "path/filepath" "regexp" "sort" "strconv" "strings" "time" slurpContext "chorus/pkg/slurp/context" ) // Utility functions and helper types for the intelligence engine // ContentAnalysisUtils provides utilities for content analysis type ContentAnalysisUtils struct{} // NewContentAnalysisUtils creates new content analysis utilities func NewContentAnalysisUtils() *ContentAnalysisUtils { return &ContentAnalysisUtils{} } // ExtractIdentifiers extracts identifiers from code content func (cau *ContentAnalysisUtils) ExtractIdentifiers(content, language string) (functions, classes, variables []string) { switch strings.ToLower(language) { case "go": return cau.extractGoIdentifiers(content) case "javascript", "typescript": return cau.extractJSIdentifiers(content) case "python": return cau.extractPythonIdentifiers(content) case "java": return cau.extractJavaIdentifiers(content) case "rust": return cau.extractRustIdentifiers(content) default: return cau.extractGenericIdentifiers(content) } } func (cau *ContentAnalysisUtils) extractGoIdentifiers(content string) (functions, classes, variables []string) { // Go function pattern: func FunctionName funcPattern := regexp.MustCompile(`func\s+(\w+)\s*\(`) funcMatches := funcPattern.FindAllStringSubmatch(content, -1) for _, match := range funcMatches { if len(match) > 1 { functions = append(functions, match[1]) } } // Go type/struct pattern: type TypeName struct typePattern := regexp.MustCompile(`type\s+(\w+)\s+struct`) typeMatches := typePattern.FindAllStringSubmatch(content, -1) for _, match := range typeMatches { if len(match) > 1 { classes = append(classes, match[1]) } } // Go variable pattern: var varName or varName := varPattern := regexp.MustCompile(`(?:var\s+(\w+)|(\w+)\s*:=)`) varMatches := varPattern.FindAllStringSubmatch(content, -1) for _, match := range varMatches { if len(match) > 1 && match[1] != "" { variables = append(variables, match[1]) } else if len(match) > 2 && match[2] != "" { variables = append(variables, match[2]) } } return removeDuplicates(functions), removeDuplicates(classes), removeDuplicates(variables) } func (cau *ContentAnalysisUtils) extractJSIdentifiers(content string) (functions, classes, variables []string) { // JavaScript function patterns funcPatterns := []*regexp.Regexp{ regexp.MustCompile(`function\s+(\w+)\s*\(`), regexp.MustCompile(`(\w+)\s*:\s*function\s*\(`), regexp.MustCompile(`const\s+(\w+)\s*=\s*\(`), regexp.MustCompile(`(?:let|var)\s+(\w+)\s*=\s*\(`), } for _, pattern := range funcPatterns { matches := pattern.FindAllStringSubmatch(content, -1) for _, match := range matches { if len(match) > 1 { functions = append(functions, match[1]) } } } // JavaScript class pattern classPattern := regexp.MustCompile(`class\s+(\w+)`) classMatches := classPattern.FindAllStringSubmatch(content, -1) for _, match := range classMatches { if len(match) > 1 { classes = append(classes, match[1]) } } // JavaScript variable patterns varPatterns := []*regexp.Regexp{ regexp.MustCompile(`(?:const|let|var)\s+(\w+)`), } for _, pattern := range varPatterns { matches := pattern.FindAllStringSubmatch(content, -1) for _, match := range matches { if len(match) > 1 { variables = append(variables, match[1]) } } } return removeDuplicates(functions), removeDuplicates(classes), removeDuplicates(variables) } func (cau *ContentAnalysisUtils) extractPythonIdentifiers(content string) (functions, classes, variables []string) { // Python function pattern funcPattern := regexp.MustCompile(`def\s+(\w+)\s*\(`) funcMatches := funcPattern.FindAllStringSubmatch(content, -1) for _, match := range funcMatches { if len(match) > 1 { functions = append(functions, match[1]) } } // Python class pattern classPattern := regexp.MustCompile(`class\s+(\w+)`) classMatches := classPattern.FindAllStringSubmatch(content, -1) for _, match := range classMatches { if len(match) > 1 { classes = append(classes, match[1]) } } // Python variable pattern (simple assignment) varPattern := regexp.MustCompile(`^(\w+)\s*=`) lines := strings.Split(content, "\n") for _, line := range lines { line = strings.TrimSpace(line) if matches := varPattern.FindStringSubmatch(line); matches != nil && len(matches) > 1 { variables = append(variables, matches[1]) } } return removeDuplicates(functions), removeDuplicates(classes), removeDuplicates(variables) } func (cau *ContentAnalysisUtils) extractJavaIdentifiers(content string) (functions, classes, variables []string) { // Java method pattern methodPattern := regexp.MustCompile(`(?:public|private|protected)?\s*(?:static)?\s*\w+\s+(\w+)\s*\(`) methodMatches := methodPattern.FindAllStringSubmatch(content, -1) for _, match := range methodMatches { if len(match) > 1 { functions = append(functions, match[1]) } } // Java class pattern classPattern := regexp.MustCompile(`(?:public|private)?\s*class\s+(\w+)`) classMatches := classPattern.FindAllStringSubmatch(content, -1) for _, match := range classMatches { if len(match) > 1 { classes = append(classes, match[1]) } } // Java field/variable pattern varPattern := regexp.MustCompile(`(?:private|public|protected)?\s*\w+\s+(\w+)\s*[=;]`) varMatches := varPattern.FindAllStringSubmatch(content, -1) for _, match := range varMatches { if len(match) > 1 { variables = append(variables, match[1]) } } return removeDuplicates(functions), removeDuplicates(classes), removeDuplicates(variables) } func (cau *ContentAnalysisUtils) extractRustIdentifiers(content string) (functions, classes, variables []string) { // Rust function pattern funcPattern := regexp.MustCompile(`fn\s+(\w+)\s*\(`) funcMatches := funcPattern.FindAllStringSubmatch(content, -1) for _, match := range funcMatches { if len(match) > 1 { functions = append(functions, match[1]) } } // Rust struct pattern structPattern := regexp.MustCompile(`struct\s+(\w+)`) structMatches := structPattern.FindAllStringSubmatch(content, -1) for _, match := range structMatches { if len(match) > 1 { classes = append(classes, match[1]) } } // Rust variable pattern varPattern := regexp.MustCompile(`let\s+(?:mut\s+)?(\w+)`) varMatches := varPattern.FindAllStringSubmatch(content, -1) for _, match := range varMatches { if len(match) > 1 { variables = append(variables, match[1]) } } return removeDuplicates(functions), removeDuplicates(classes), removeDuplicates(variables) } func (cau *ContentAnalysisUtils) extractGenericIdentifiers(content string) (functions, classes, variables []string) { // Generic patterns for unknown languages words := regexp.MustCompile(`\b[a-zA-Z_]\w*\b`).FindAllString(content, -1) return removeDuplicates(words), []string{}, []string{} } // CalculateComplexity calculates code complexity based on various metrics func (cau *ContentAnalysisUtils) CalculateComplexity(content, language string) float64 { complexity := 0.0 // Lines of code (basic metric) lines := strings.Split(content, "\n") nonEmptyLines := 0 for _, line := range lines { if strings.TrimSpace(line) != "" && !strings.HasPrefix(strings.TrimSpace(line), "//") { nonEmptyLines++ } } // Base complexity from lines of code complexity += float64(nonEmptyLines) * 0.1 // Control flow complexity (if, for, while, switch, etc.) controlFlowPatterns := []*regexp.Regexp{ regexp.MustCompile(`\b(?:if|for|while|switch|case)\b`), regexp.MustCompile(`\b(?:try|catch|finally)\b`), regexp.MustCompile(`\?\s*.*\s*:`), // ternary operator } for _, pattern := range controlFlowPatterns { matches := pattern.FindAllString(content, -1) complexity += float64(len(matches)) * 0.5 } // Function complexity functions, _, _ := cau.ExtractIdentifiers(content, language) complexity += float64(len(functions)) * 0.3 // Nesting level (simple approximation) maxNesting := 0 currentNesting := 0 for _, line := range lines { trimmed := strings.TrimSpace(line) openBraces := strings.Count(trimmed, "{") closeBraces := strings.Count(trimmed, "}") currentNesting += openBraces - closeBraces if currentNesting > maxNesting { maxNesting = currentNesting } } complexity += float64(maxNesting) * 0.2 // Normalize to 0-10 scale return math.Min(10.0, complexity/10.0) } // DetectTechnologies detects technologies used in the content func (cau *ContentAnalysisUtils) DetectTechnologies(content, filename string) []string { technologies := []string{} lowerContent := strings.ToLower(content) ext := strings.ToLower(filepath.Ext(filename)) // Language detection languageMap := map[string][]string{ ".go": {"go", "golang"}, ".py": {"python"}, ".js": {"javascript", "node.js"}, ".jsx": {"javascript", "react", "jsx"}, ".ts": {"typescript"}, ".tsx": {"typescript", "react", "jsx"}, ".java": {"java"}, ".kt": {"kotlin"}, ".rs": {"rust"}, ".cpp": {"c++"}, ".c": {"c"}, ".cs": {"c#", ".net"}, ".php": {"php"}, ".rb": {"ruby"}, ".swift": {"swift"}, ".scala": {"scala"}, ".clj": {"clojure"}, ".hs": {"haskell"}, ".ml": {"ocaml"}, } if langs, exists := languageMap[ext]; exists { technologies = append(technologies, langs...) } // Framework and library detection frameworkPatterns := map[string][]string{ "react": {"import.*react", "from [\"']react[\"']", "<.*/>", "jsx"}, "vue": {"import.*vue", "from [\"']vue[\"']", "