Files
CHORUS/pkg/ai/response_parser.go
anthonyrawlins df5ec34b4f feat(execution): Add response parser for LLM artifact extraction
Implements regex-based response parser to extract file creation actions
and artifacts from LLM text responses. Agents can now produce actual
work products (files, PRs) instead of just returning instructions.

Changes:
- pkg/ai/response_parser.go: New parser with 4 extraction patterns
  * Markdown code blocks with filename comments
  * Inline backtick filenames followed by "content:" and code blocks
  * File header notation (--- filename: ---)
  * Shell heredoc syntax (cat > file << EOF)

- pkg/execution/engine.go: Skip sandbox when SandboxType empty/none
  * Prevents Docker container errors during testing
  * Preserves artifacts from AI response without sandbox execution

- pkg/ai/{ollama,resetdata}.go: Integrate response parser
  * Both providers now parse LLM output for extractable artifacts
  * Fallback to task_analysis action if no artifacts found

- internal/runtime/agent_support.go: Fix AI provider initialization
  * Set DefaultProvider in RoleModelMapping (prevents "provider not found")

- prompts/defaults.md: Add Rule O for output format guidance
  * Instructs LLMs to format responses for artifact extraction
  * Provides examples and patterns for file creation/modification
  * Explains pipeline: extraction → workspace → tests → PR → review

Test results:
- Before: 0 artifacts, 0 files generated
- After: 2 artifacts extracted successfully from LLM response
- hello.sh (60 bytes) with correct shell script content

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-11 22:08:08 +11:00

207 lines
5.5 KiB
Go

package ai
import (
"regexp"
"strings"
"time"
)
// ResponseParser extracts actions and artifacts from LLM text responses
type ResponseParser struct{}
// NewResponseParser creates a new response parser instance
func NewResponseParser() *ResponseParser {
return &ResponseParser{}
}
// ParseResponse extracts structured actions and artifacts from LLM response text
func (rp *ResponseParser) ParseResponse(response string) ([]TaskAction, []Artifact) {
var actions []TaskAction
var artifacts []Artifact
// Extract code blocks with filenames
fileBlocks := rp.extractFileBlocks(response)
for _, block := range fileBlocks {
// Create file creation action
action := TaskAction{
Type: "file_create",
Target: block.Filename,
Content: block.Content,
Result: "File created from LLM response",
Success: true,
Timestamp: time.Now(),
Metadata: map[string]interface{}{
"language": block.Language,
},
}
actions = append(actions, action)
// Create artifact
artifact := Artifact{
Name: block.Filename,
Type: "file",
Path: block.Filename,
Content: block.Content,
Size: int64(len(block.Content)),
CreatedAt: time.Now(),
}
artifacts = append(artifacts, artifact)
}
// Extract shell commands
commands := rp.extractCommands(response)
for _, cmd := range commands {
action := TaskAction{
Type: "command_run",
Target: "shell",
Content: cmd,
Result: "Command extracted from LLM response",
Success: true,
Timestamp: time.Now(),
}
actions = append(actions, action)
}
return actions, artifacts
}
// FileBlock represents a code block with filename
type FileBlock struct {
Filename string
Language string
Content string
}
// extractFileBlocks finds code blocks that represent files
func (rp *ResponseParser) extractFileBlocks(response string) []FileBlock {
var blocks []FileBlock
// Pattern 1: Markdown code blocks with filename comments
// ```language
// // filename: path/to/file.ext
// content
// ```
pattern1 := regexp.MustCompile("(?s)```(\\w+)?\\s*\\n(?://|#)\\s*(?:filename|file|path):\\s*([^\\n]+)\\n(.*?)```")
matches1 := pattern1.FindAllStringSubmatch(response, -1)
for _, match := range matches1 {
if len(match) >= 4 {
blocks = append(blocks, FileBlock{
Filename: strings.TrimSpace(match[2]),
Language: match[1],
Content: strings.TrimSpace(match[3]),
})
}
}
// Pattern 2: Filename in backticks followed by "content" and code block
// Matches: `filename.ext` ... content ... ```language ... ```
// This handles cases like:
// - "file named `hello.sh` ... should have the following content: ```bash ... ```"
// - "Create `script.py` with this content: ```python ... ```"
pattern2 := regexp.MustCompile("`([^`]+)`[^`]*?(?:content|code)[^`]*?```([a-z]+)?\\s*\\n([^`]+)```")
matches2 := pattern2.FindAllStringSubmatch(response, -1)
for _, match := range matches2 {
if len(match) >= 4 {
blocks = append(blocks, FileBlock{
Filename: strings.TrimSpace(match[1]),
Language: match[2],
Content: strings.TrimSpace(match[3]),
})
}
}
// Pattern 3: File header notation
// --- filename: path/to/file.ext ---
// content
// --- end ---
pattern3 := regexp.MustCompile("(?s)---\\s*(?:filename|file):\\s*([^\\n]+)\\s*---\\s*\\n(.*?)\\n---\\s*(?:end)?\\s*---")
matches3 := pattern3.FindAllStringSubmatch(response, -1)
for _, match := range matches3 {
if len(match) >= 3 {
blocks = append(blocks, FileBlock{
Filename: strings.TrimSpace(match[1]),
Language: rp.detectLanguage(match[1]),
Content: strings.TrimSpace(match[2]),
})
}
}
// Pattern 4: Shell script style file creation
// cat > filename.ext << 'EOF'
// content
// EOF
pattern4 := regexp.MustCompile("(?s)cat\\s*>\\s*([^\\s<]+)\\s*<<\\s*['\"]?EOF['\"]?\\s*\\n(.*?)\\nEOF")
matches4 := pattern4.FindAllStringSubmatch(response, -1)
for _, match := range matches4 {
if len(match) >= 3 {
blocks = append(blocks, FileBlock{
Filename: strings.TrimSpace(match[1]),
Language: rp.detectLanguage(match[1]),
Content: strings.TrimSpace(match[2]),
})
}
}
return blocks
}
// extractCommands extracts shell commands from response
func (rp *ResponseParser) extractCommands(response string) []string {
var commands []string
// Pattern: Markdown code blocks marked as bash/sh
pattern := regexp.MustCompile("(?s)```(?:bash|sh|shell)\\s*\\n(.*?)```")
matches := pattern.FindAllStringSubmatch(response, -1)
for _, match := range matches {
if len(match) >= 2 {
lines := strings.Split(strings.TrimSpace(match[1]), "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
// Skip comments and empty lines
if line != "" && !strings.HasPrefix(line, "#") {
commands = append(commands, line)
}
}
}
}
return commands
}
// detectLanguage attempts to detect language from filename extension
func (rp *ResponseParser) detectLanguage(filename string) string {
ext := ""
if idx := strings.LastIndex(filename, "."); idx != -1 {
ext = strings.ToLower(filename[idx+1:])
}
languageMap := map[string]string{
"go": "go",
"py": "python",
"js": "javascript",
"ts": "typescript",
"java": "java",
"cpp": "cpp",
"c": "c",
"rs": "rust",
"sh": "bash",
"bash": "bash",
"yaml": "yaml",
"yml": "yaml",
"json": "json",
"xml": "xml",
"html": "html",
"css": "css",
"md": "markdown",
"txt": "text",
"sql": "sql",
"rb": "ruby",
"php": "php",
}
if lang, ok := languageMap[ext]; ok {
return lang
}
return "text"
}