hexai: switch default Ollama Cloud model to gemma4:31b-cloud

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
author: Paul Buetow <paul@buetow.org> 2026-05-01 15:10:20 +0300
committer: Paul Buetow <paul@buetow.org> 2026-05-01 15:10:20 +0300
commit: e5cd3949c477308709f0052bbd0fae68a970bfa7 (patch)
tree: c065f59f5976943874ba1a90b63c7decae1fe494
parent: c9f3180fda2c07e5a27b1a99e74c5e9ec83b4222 (diff)
1 files changed, 4 insertions, 5 deletions
diff --git a/hexai/config.toml b/hexai/config.toml
index 388faf2..1a6dff1 100644
--- a/hexai/config.toml
+++ b/hexai/config.toml
@@ -16,14 +16,13 @@ slashcommand_dir = "~/.cursor/commands"
 name = "ollama"
 
 [ollama]
-model       = "kimi-k2.6"
+model       = "gemma4:31b-cloud"
 base_url    = "https://ollama.com"
 temperature = 0.2
 
-# In-code auto-completion uses gemma4:31b (the dense larger Gemma 4 on Ollama
-# Cloud). It's faster and tighter than kimi-k2.6 for short, latency-sensitive
-# completions while everything else still defaults to kimi-k2.6 above.
+# In-code auto-completion uses gemma4:31b-cloud (the dense Gemma 4 hosted on
+# Ollama Cloud). Latency-sensitive completions use the same model as chat.
 [[models.completion]]
 provider    = "ollama"
-model       = "gemma4:31b"
+model       = "gemma4:31b-cloud"
 temperature = 0.2
author	Paul Buetow <paul@buetow.org>	2026-05-01 15:10:20 +0300
committer	Paul Buetow <paul@buetow.org>	2026-05-01 15:10:20 +0300
commit	e5cd3949c477308709f0052bbd0fae68a970bfa7 (patch)
tree	c065f59f5976943874ba1a90b63c7decae1fe494
parent	c9f3180fda2c07e5a27b1a99e74c5e9ec83b4222 (diff)