CHORUS/pkg/ai/response_parser.go

package ai

import (
	"regexp"
	"strings"
	"time"
)

// ResponseParser extracts actions and artifacts from LLM text responses
type ResponseParser struct{}

// NewResponseParser creates a new response parser instance
func NewResponseParser() *ResponseParser {
	return &ResponseParser{}
}

// ParseResponse extracts structured actions and artifacts from LLM response text
func (rp *ResponseParser) ParseResponse(response string) ([]TaskAction, []Artifact) {
	var actions []TaskAction
	var artifacts []Artifact

	// Extract code blocks with filenames
	fileBlocks := rp.extractFileBlocks(response)
	for _, block := range fileBlocks {
		// Create file creation action
		action := TaskAction{
			Type:      "file_create",
			Target:    block.Filename,
			Content:   block.Content,
			Result:    "File created from LLM response",
			Success:   true,
			Timestamp: time.Now(),
			Metadata: map[string]interface{}{
				"language": block.Language,
			},
		}
		actions = append(actions, action)

		// Create artifact
		artifact := Artifact{
			Name:      block.Filename,
			Type:      "file",
			Path:      block.Filename,
			Content:   block.Content,
			Size:      int64(len(block.Content)),
			CreatedAt: time.Now(),
		}
		artifacts = append(artifacts, artifact)
	}

	// Extract shell commands
	commands := rp.extractCommands(response)
	for _, cmd := range commands {
		action := TaskAction{
			Type:      "command_run",
			Target:    "shell",
			Content:   cmd,
			Result:    "Command extracted from LLM response",
			Success:   true,
			Timestamp: time.Now(),
		}
		actions = append(actions, action)
	}

	return actions, artifacts
}

// FileBlock represents a code block with filename
type FileBlock struct {
	Filename string
	Language string
	Content  string
}

// extractFileBlocks finds code blocks that represent files
func (rp *ResponseParser) extractFileBlocks(response string) []FileBlock {
	var blocks []FileBlock

	// Pattern 1: Markdown code blocks with filename comments
	// ```language
	// // filename: path/to/file.ext
	// content
	// ```
	pattern1 := regexp.MustCompile("(?s)```(\\w+)?\\s*\\n(?://|#)\\s*(?:filename|file|path):\\s*([^\\n]+)\\n(.*?)```")
	matches1 := pattern1.FindAllStringSubmatch(response, -1)
	for _, match := range matches1 {
		if len(match) >= 4 {
			blocks = append(blocks, FileBlock{
				Filename: strings.TrimSpace(match[2]),
				Language: match[1],
				Content:  strings.TrimSpace(match[3]),
			})
		}
	}

	// Pattern 2: Filename in backticks followed by "content" and code block
	// Matches: `filename.ext` ... content ... ```language ... ```
	// This handles cases like:
	// - "file named `hello.sh` ... should have the following content: ```bash ... ```"
	// - "Create `script.py` with this content: ```python ... ```"
	pattern2 := regexp.MustCompile("`([^`]+)`[^`]*?(?:content|code)[^`]*?```([a-z]+)?\\s*\\n([^`]+)```")
	matches2 := pattern2.FindAllStringSubmatch(response, -1)
	for _, match := range matches2 {
		if len(match) >= 4 {
			blocks = append(blocks, FileBlock{
				Filename: strings.TrimSpace(match[1]),
				Language: match[2],
				Content:  strings.TrimSpace(match[3]),
			})
		}
	}

	// Pattern 3: File header notation
	// --- filename: path/to/file.ext ---
	// content
	// --- end ---
	pattern3 := regexp.MustCompile("(?s)---\\s*(?:filename|file):\\s*([^\\n]+)\\s*---\\s*\\n(.*?)\\n---\\s*(?:end)?\\s*---")
	matches3 := pattern3.FindAllStringSubmatch(response, -1)
	for _, match := range matches3 {
		if len(match) >= 3 {
			blocks = append(blocks, FileBlock{
				Filename: strings.TrimSpace(match[1]),
				Language: rp.detectLanguage(match[1]),
				Content:  strings.TrimSpace(match[2]),
			})
		}
	}

	// Pattern 4: Shell script style file creation
	// cat > filename.ext << 'EOF'
	// content
	// EOF
	pattern4 := regexp.MustCompile("(?s)cat\\s*>\\s*([^\\s<]+)\\s*<<\\s*['\"]?EOF['\"]?\\s*\\n(.*?)\\nEOF")
	matches4 := pattern4.FindAllStringSubmatch(response, -1)
	for _, match := range matches4 {
		if len(match) >= 3 {
			blocks = append(blocks, FileBlock{
				Filename: strings.TrimSpace(match[1]),
				Language: rp.detectLanguage(match[1]),
				Content:  strings.TrimSpace(match[2]),
			})
		}
	}

	return blocks
}

// extractCommands extracts shell commands from response
func (rp *ResponseParser) extractCommands(response string) []string {
	var commands []string

	// Pattern: Markdown code blocks marked as bash/sh
	pattern := regexp.MustCompile("(?s)```(?:bash|sh|shell)\\s*\\n(.*?)```")
	matches := pattern.FindAllStringSubmatch(response, -1)
	for _, match := range matches {
		if len(match) >= 2 {
			lines := strings.Split(strings.TrimSpace(match[1]), "\n")
			for _, line := range lines {
				line = strings.TrimSpace(line)
				// Skip comments and empty lines
				if line != "" && !strings.HasPrefix(line, "#") {
					commands = append(commands, line)
				}
			}
		}
	}

	return commands
}

// detectLanguage attempts to detect language from filename extension
func (rp *ResponseParser) detectLanguage(filename string) string {
	ext := ""
	if idx := strings.LastIndex(filename, "."); idx != -1 {
		ext = strings.ToLower(filename[idx+1:])
	}

	languageMap := map[string]string{
		"go":    "go",
		"py":    "python",
		"js":    "javascript",
		"ts":    "typescript",
		"java":  "java",
		"cpp":   "cpp",
		"c":     "c",
		"rs":    "rust",
		"sh":    "bash",
		"bash":  "bash",
		"yaml":  "yaml",
		"yml":   "yaml",
		"json":  "json",
		"xml":   "xml",
		"html":  "html",
		"css":   "css",
		"md":    "markdown",
		"txt":   "text",
		"sql":   "sql",
		"rb":    "ruby",
		"php":   "php",
	}

	if lang, ok := languageMap[ext]; ok {
		return lang
	}
	return "text"
}