feat: add completion_wait_all config and fix Anthropic system messages

- Add completion_wait_all config option (default true) to wait for all backends before returning results, or return first result immediately - Fix Anthropic API: extract system messages to top-level system field as required by Messages API (was causing 400 errors) - Add anthropic case to server.go clientFor() for model overrides
author: Paul Buetow <pbuetow@mimecast.com> 2026-01-30 12:16:31 +0200
committer: Paul Buetow <pbuetow@mimecast.com> 2026-01-30 12:16:31 +0200
commit: be127d0d1790ee10eb2c9103f32ab27fb98d4f4d (patch)
tree: c1a6cdaad61a37bbdd70ddbe161c05aaf13d09ab
parent: d3e0edbe16459f07506f70611b639d0a0a7f054e (diff)
6 files changed, 66 insertions, 9 deletions
diff --git a/config.toml.example b/config.toml.example
index 473e48c..ae8110a 100644
--- a/config.toml.example
+++ b/config.toml.example
@@ -20,6 +20,7 @@ log_preview_limit = 100              # chars shown in log previews
 completion_debounce_ms = 800         # idle ms before sending a request
 completion_throttle_ms = 0           # min ms between requests (0 disables)
 manual_invoke_min_prefix = 0         # required identifier chars for manual invoke
+completion_wait_all = true           # wait for all backends (true) or return first (false)
 
 [triggers]
 trigger_characters = [".", ":", "/", "_", " "]
@@ -37,7 +38,7 @@ chat_prefixes = ["?", "!", ":", ";"]  # single-character items
 # completion = "gpt-4o-mini"
 # chat = "gpt-4.1"
 
-# Full array form for multiple entries
+# Full array form for multiple entries (completions fan out to all; labels show provider:model).
 # [[models.completion]]
 # provider = "openai"
 # model = "gpt-4o-mini"
diff --git a/internal/appconfig/config.go b/internal/appconfig/config.go
index f41d4d9..78237be 100644
--- a/internal/appconfig/config.go
+++ b/internal/appconfig/config.go
@@ -40,6 +40,10 @@ type App struct {
 	// Completion throttle in milliseconds. When > 0, caps the minimum spacing
 	// between LLM requests (both chat and code-completer paths).
 	CompletionThrottleMs int `json:"completion_throttle_ms" toml:"completion_throttle_ms"`
+	// CompletionWaitAll controls whether to wait for all configured completion
+	// backends before returning results. When true (default), waits for all
+	// backends. When false, returns the first result immediately.
+	CompletionWaitAll *bool `json:"completion_wait_all" toml:"completion_wait_all"`
 
 	TriggerCharacters []string `json:"trigger_characters" toml:"trigger_characters"`
 	Provider          string   `json:"provider" toml:"provider"`
@@ -259,9 +263,10 @@ type sectionLogging struct {
 }
 
 type sectionCompletion struct {
-	CompletionDebounceMs  int `toml:"completion_debounce_ms"`
-	CompletionThrottleMs  int `toml:"completion_throttle_ms"`
-	ManualInvokeMinPrefix int `toml:"manual_invoke_min_prefix"`
+	CompletionDebounceMs  int   `toml:"completion_debounce_ms"`
+	CompletionThrottleMs  int   `toml:"completion_throttle_ms"`
+	ManualInvokeMinPrefix int   `toml:"manual_invoke_min_prefix"`
+	CompletionWaitAll     *bool `toml:"completion_wait_all"`
 }
 
 type sectionTriggers struct {
@@ -425,11 +430,13 @@ func (fc *fileConfig) toApp() App {
 	}
 
 	// completion
-	if (fc.Completion != sectionCompletion{}) {
+	if fc.Completion.CompletionDebounceMs != 0 || fc.Completion.CompletionThrottleMs != 0 ||
+		fc.Completion.ManualInvokeMinPrefix != 0 || fc.Completion.CompletionWaitAll != nil {
 		tmp := App{
 			CompletionDebounceMs:  fc.Completion.CompletionDebounceMs,
 			CompletionThrottleMs:  fc.Completion.CompletionThrottleMs,
 			ManualInvokeMinPrefix: fc.Completion.ManualInvokeMinPrefix,
+			CompletionWaitAll:     fc.Completion.CompletionWaitAll,
 		}
 		out.mergeBasics(&tmp)
 	}
@@ -888,6 +895,9 @@ func (a *App) mergeBasics(other *App) {
 	if other.CompletionThrottleMs > 0 {
 		a.CompletionThrottleMs = other.CompletionThrottleMs
 	}
+	if other.CompletionWaitAll != nil {
+		a.CompletionWaitAll = other.CompletionWaitAll
+	}
 	if len(other.TriggerCharacters) > 0 {
 		a.TriggerCharacters = slices.Clone(other.TriggerCharacters)
 	}
diff --git a/internal/hexailsp/run.go b/internal/hexailsp/run.go
index f39ea96..1afa70a 100644
--- a/internal/hexailsp/run.go
+++ b/internal/hexailsp/run.go
@@ -195,6 +195,7 @@ func makeServerOptions(cfg appconfig.App, logContext bool, client llm.Client, lo
 		ManualInvokeMinPrefix: cfg.ManualInvokeMinPrefix,
 		CompletionDebounceMs:  cfg.CompletionDebounceMs,
 		CompletionThrottleMs:  cfg.CompletionThrottleMs,
+		CompletionWaitAll:     cfg.CompletionWaitAll,
 		InlineOpen:            cfg.InlineOpen,
 		InlineClose:           cfg.InlineClose,
 		ChatSuffix:            cfg.ChatSuffix,
diff --git a/internal/llm/anthropic.go b/internal/llm/anthropic.go
index ebb6826..c0cdc9a 100644
--- a/internal/llm/anthropic.go
+++ b/internal/llm/anthropic.go
@@ -230,8 +230,21 @@ func buildAnthropicChatRequest(o Options, messages []Message, defaultModel strin
 		Stream:    stream,
 		MaxTokens: 4096, // Anthropic requires max_tokens
 	}
-	req.Messages = make([]anthropicMessage, len(messages))
-	for i, m := range messages {
+	// Anthropic requires system messages in a top-level "system" field, not in messages array
+	var systemParts []string
+	var nonSystemMessages []Message
+	for _, m := range messages {
+		if m.Role == "system" {
+			systemParts = append(systemParts, m.Content)
+		} else {
+			nonSystemMessages = append(nonSystemMessages, m)
+		}
+	}
+	if len(systemParts) > 0 {
+		req.System = strings.Join(systemParts, "\n\n")
+	}
+	req.Messages = make([]anthropicMessage, len(nonSystemMessages))
+	for i, m := range nonSystemMessages {
 		req.Messages[i] = anthropicMessage{
 			Role:    m.Role,
 			Content: m.Content,
@@ -246,8 +259,6 @@ func buildAnthropicChatRequest(o Options, messages []Message, defaultModel strin
 	if o.MaxTokens > 0 {
 		req.MaxTokens = o.MaxTokens
 	}
-	// Note: Anthropic's API doesn't support stop sequences in the same way as OpenAI,
-	// but we keep them in the request for future compatibility.
 	return req
 }
 
diff --git a/internal/lsp/handlers_completion.go b/internal/lsp/handlers_completion.go
index 2fac1f3..28da503 100644
--- a/internal/lsp/handlers_completion.go
+++ b/internal/lsp/handlers_completion.go
@@ -155,6 +155,24 @@ func (s *Server) tryLLMCompletion(p CompletionParams, above, current, below, fun
 		return res.items, true, false
 	}
 
+	waitAll := s.completionWaitAll()
+	if waitAll {
+		// Wait for all backends, return combined results
+		defer end()
+		combined := make([]CompletionItem, 0)
+		for res := range results {
+			if !res.ok || len(res.items) == 0 {
+				continue
+			}
+			combined = append(combined, res.items...)
+		}
+		if len(combined) == 0 {
+			return nil, false, false
+		}
+		return combined, true, false
+	}
+
+	// Return first result immediately, store combined for later
 	firstCh := make(chan []CompletionItem, 1)
 	go func(planKey string) {
 		defer end()
diff --git a/internal/lsp/server.go b/internal/lsp/server.go
index 67e3cab..127b089 100644
--- a/internal/lsp/server.go
+++ b/internal/lsp/server.go
@@ -71,6 +71,7 @@ type ServerOptions struct {
 	ManualInvokeMinPrefix int
 	CompletionDebounceMs  int
 	CompletionThrottleMs  int
+	CompletionWaitAll     *bool
 
 	// Inline/chat triggers
 	InlineOpen   string
@@ -160,6 +161,7 @@ func (s *Server) applyOptions(opts ServerOptions) {
 		s.cfg.ManualInvokeMinPrefix = opts.ManualInvokeMinPrefix
 		s.cfg.CompletionDebounceMs = opts.CompletionDebounceMs
 		s.cfg.CompletionThrottleMs = opts.CompletionThrottleMs
+		s.cfg.CompletionWaitAll = opts.CompletionWaitAll
 		s.cfg.InlineOpen = opts.InlineOpen
 		s.cfg.InlineClose = opts.InlineClose
 		s.cfg.ChatSuffix = opts.ChatSuffix
@@ -305,6 +307,12 @@ func (s *Server) clientFor(spec requestSpec) llm.Client {
 		} else if spec.fallbackModel != "" {
 			cfg.OllamaModel = spec.fallbackModel
 		}
+	case "anthropic":
+		if modelOverride != "" {
+			cfg.AnthropicModel = modelOverride
+		} else if spec.fallbackModel != "" {
+			cfg.AnthropicModel = spec.fallbackModel
+		}
 	}
 	client, err := newClientForProvider(cfg, provider)
 	if err != nil {
@@ -451,6 +459,14 @@ func (s *Server) completionThrottle() time.Duration {
 	return time.Duration(cfg.CompletionThrottleMs) * time.Millisecond
 }
 
+func (s *Server) completionWaitAll() bool {
+	cfg := s.currentConfig()
+	if cfg.CompletionWaitAll == nil {
+		return true // default: wait for all backends
+	}
+	return *cfg.CompletionWaitAll
+}
+
 func (s *Server) inlineMarkers() (open string, close string, openChar byte, closeChar byte) {
 	cfg := s.currentConfig()
 	open = strings.TrimSpace(cfg.InlineOpen)
author	Paul Buetow <pbuetow@mimecast.com>	2026-01-30 12:16:31 +0200
committer	Paul Buetow <pbuetow@mimecast.com>	2026-01-30 12:16:31 +0200
commit	be127d0d1790ee10eb2c9103f32ab27fb98d4f4d (patch)
tree	c1a6cdaad61a37bbdd70ddbe161c05aaf13d09ab
parent	d3e0edbe16459f07506f70611b639d0a0a7f054e (diff)