9 files changed, 1245 insertions, 0 deletions
diff --git a/pi/agent/extensions/ask-mode/README.md b/pi/agent/extensions/ask-mode/README.md
new file mode 100644
index 0000000..2c0d17c
--- /dev/null
+++ b/pi/agent/extensions/ask-mode/README.md
@@ -0,0 +1,84 @@
+# Ask Mode
+
+Exploration-only mode for Pi.
+
+This extension adds a session-scoped `/ask` mode that turns Pi into a read-only
+investigation assistant. It is meant for understanding a codebase, debugging,
+reading logs, or answering questions without making changes.
+
+## What It Does
+
+- `/ask` enters ask mode
+- `/ask <prompt>` enters ask mode and immediately sends the prompt
+- `/ask-exit` leaves ask mode
+- `/ask-status` shows whether ask mode is active
+- limits tools to `read`, `bash`, `grep`, `find`, and `ls`
+- blocks unsafe bash commands even though `bash` stays enabled
+- injects per-turn instructions telling the model to inspect and explain, not implement
+
+## Usage Flows
+
+### Flow 1: Enter ask mode first, then explore
+
+```text
+/ask
+```
+
+Then ask questions naturally:
+
+```text
+Why does VM2 fail to reach readiness on the first create attempt?
+```
+
+### Flow 2: Enter ask mode and ask immediately
+
+```text
+/ask Compare the fresh-subagent extension behavior with what the README claims.
+```
+
+### Flow 3: Leave ask mode
+
+```text
+/ask-exit
+```
+
+That restores the previously active tool set.
+
+### Flow 4: Check whether you are still in ask mode
+
+```text
+/ask-status
+```
+
+## Safety Model
+
+Ask mode is meant for exploration only.
+
+- `edit` and `write` are removed from the active tool set
+- custom tools outside the ask-mode allowlist are blocked
+- `bash` remains available, but only for safe read-only commands
+
+Examples of the kind of bash commands ask mode allows:
+
+- `rg foo src`
+- `git diff`
+- `ls -la`
+- `sed -n '1,120p' file`
+- `curl http://host/...`
+
+Examples it blocks:
+
+- `rm`
+- `touch`
+- `mkdir`
+- `git commit`
+- `npm install`
+- `sudo ...`
+- shell redirection that writes files
+
+## Notes And Limits
+
+- This is session-scoped and restores on resume if the session was left in ask mode.
+- It is intended for investigation, not planning or implementation.
+- If you ask for a change while ask mode is active, Pi should explain what would
+  need to change instead of making the change.
diff --git a/pi/agent/extensions/ask-mode/index.ts b/pi/agent/extensions/ask-mode/index.ts
new file mode 100644
index 0000000..4f19815
--- /dev/null
+++ b/pi/agent/extensions/ask-mode/index.ts
@@ -0,0 +1,183 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import type { TextContent } from "@mariozechner/pi-ai";
+import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
+import { isSafeAskModeCommand } from "./utils.js";
+
+const ASK_MODE_TOOLS = ["read", "bash", "grep", "find", "ls"];
+const STATE_TYPE = "ask-mode";
+const CONTEXT_TYPE = "ask-mode-context";
+
+interface AskModeState {
+	enabled: boolean;
+	normalTools: string[];
+}
+
+function hasAskModeMarker(message: AgentMessage): boolean {
+	const customMessage = message as AgentMessage & { customType?: string };
+	if (customMessage.customType === CONTEXT_TYPE) return true;
+
+	if (message.role !== "user") return false;
+	if (typeof message.content === "string") return message.content.includes("[ASK MODE ACTIVE]");
+	if (!Array.isArray(message.content)) return false;
+
+	return message.content.some(
+		(block) => block.type === "text" && (block as TextContent).text?.includes("[ASK MODE ACTIVE]"),
+	);
+}
+
+export default function askModeExtension(pi: ExtensionAPI): void {
+	let askModeEnabled = false;
+	let normalTools: string[] = [];
+
+	function persistState(): void {
+		pi.appendEntry<AskModeState>(STATE_TYPE, {
+			enabled: askModeEnabled,
+			normalTools,
+		});
+	}
+
+	function updateStatus(ctx: ExtensionContext): void {
+		if (!askModeEnabled) {
+			ctx.ui.setStatus("ask-mode", undefined);
+			ctx.ui.setWidget("ask-mode", undefined);
+			return;
+		}
+
+		ctx.ui.setStatus("ask-mode", ctx.ui.theme.fg("warning", "⏸ ask"));
+		ctx.ui.setWidget("ask-mode", [
+			ctx.ui.theme.fg("warning", "Ask mode"),
+			"Exploration only",
+			"Files are read-only",
+			"Bash is restricted to safe read-only commands",
+		]);
+	}
+
+	function enterAskMode(ctx: ExtensionContext): void {
+		if (askModeEnabled) {
+			updateStatus(ctx);
+			return;
+		}
+
+		normalTools = pi.getActiveTools();
+		askModeEnabled = true;
+		pi.setActiveTools(ASK_MODE_TOOLS);
+		ctx.ui.notify(`Ask mode enabled. Tools: ${ASK_MODE_TOOLS.join(", ")}`, "info");
+		updateStatus(ctx);
+		persistState();
+	}
+
+	function exitAskMode(ctx: ExtensionContext): void {
+		if (!askModeEnabled) {
+			ctx.ui.notify("Ask mode is not active.", "info");
+			updateStatus(ctx);
+			return;
+		}
+
+		askModeEnabled = false;
+		pi.setActiveTools(normalTools.length > 0 ? normalTools : ["read", "bash", "edit", "write"]);
+		ctx.ui.notify("Ask mode disabled. Previous tools restored.", "info");
+		updateStatus(ctx);
+		persistState();
+	}
+
+	pi.registerCommand("ask", {
+		description: "Enter ask mode for exploration-only work. Optional prompt sends a question immediately.",
+		handler: async (args, ctx) => {
+			const prompt = args.trim();
+			enterAskMode(ctx);
+			if (prompt) {
+				pi.sendUserMessage(prompt);
+				if (!ctx.hasUI) {
+					await ctx.waitForIdle();
+				}
+			}
+		},
+	});
+
+	pi.registerCommand("ask-exit", {
+		description: "Leave ask mode and restore the previous tool set",
+		handler: async (_args, ctx) => exitAskMode(ctx),
+	});
+
+	pi.registerCommand("ask-status", {
+		description: "Show whether ask mode is active",
+		handler: async (_args, ctx) => {
+			const message = askModeEnabled
+				? `Ask mode active. Tools: ${ASK_MODE_TOOLS.join(", ")}`
+				: "Ask mode is not active.";
+			if (!ctx.hasUI) {
+				process.stdout.write(`${message}\n`);
+				return;
+			}
+			ctx.ui.notify(message, "info");
+		},
+	});
+
+	pi.on("tool_call", async (event) => {
+		if (!askModeEnabled) return;
+
+		if (!ASK_MODE_TOOLS.includes(event.toolName)) {
+			return {
+				block: true,
+				reason: `Ask mode: tool "${event.toolName}" is disabled. Use /ask-exit before modifying files or using other tools.`,
+			};
+		}
+
+		if (event.toolName === "bash") {
+			const command = String(event.input.command ?? "");
+			if (!isSafeAskModeCommand(command)) {
+				return {
+					block: true,
+					reason: `Ask mode: bash command blocked (not recognized as safe read-only exploration).\nCommand: ${command}`,
+				};
+			}
+		}
+	});
+
+	pi.on("context", async (event) => {
+		if (askModeEnabled) return;
+		return {
+			messages: event.messages.filter((message) => !hasAskModeMarker(message as AgentMessage)),
+		};
+	});
+
+	pi.on("before_agent_start", async () => {
+		if (!askModeEnabled) return;
+
+		return {
+			message: {
+				customType: CONTEXT_TYPE,
+				content: `[ASK MODE ACTIVE]
+You are in ask mode: exploration only.
+
+Rules:
+- Do not modify files.
+- Do not use edit or write tools.
+- Use read, grep, find, ls, and only safe read-only bash commands.
+- Inspect, explain, compare, summarize, and answer questions.
+- If a requested action would require a file change, say so explicitly instead of doing it.
+
+Focus on observation and analysis, not implementation.`,
+				display: false,
+			},
+		};
+	});
+
+	pi.on("session_start", async (_event, ctx) => {
+		const entries = ctx.sessionManager.getEntries();
+		const latestState = entries
+			.filter((entry: { type: string; customType?: string }) => entry.type === "custom" && entry.customType === STATE_TYPE)
+			.pop() as { data?: AskModeState } | undefined;
+
+		if (latestState?.data) {
+			askModeEnabled = latestState.data.enabled ?? askModeEnabled;
+			normalTools = latestState.data.normalTools ?? normalTools;
+		}
+
+		if (askModeEnabled) {
+			pi.setActiveTools(ASK_MODE_TOOLS);
+		}
+
+		updateStatus(ctx);
+	});
+}
diff --git a/pi/agent/extensions/ask-mode/utils.ts b/pi/agent/extensions/ask-mode/utils.ts
new file mode 100644
index 0000000..db8c889
--- /dev/null
+++ b/pi/agent/extensions/ask-mode/utils.ts
@@ -0,0 +1,94 @@
+const DESTRUCTIVE_PATTERNS = [
+	/\brm\b/i,
+	/\brmdir\b/i,
+	/\bmv\b/i,
+	/\bcp\b/i,
+	/\bmkdir\b/i,
+	/\btouch\b/i,
+	/\bchmod\b/i,
+	/\bchown\b/i,
+	/\bchgrp\b/i,
+	/\bln\b/i,
+	/\btee\b/i,
+	/\btruncate\b/i,
+	/\bdd\b/i,
+	/\bshred\b/i,
+	/(^|[^<])>(?!>)/,
+	/>>/,
+	/\bnpm\s+(install|uninstall|update|ci|link|publish)/i,
+	/\byarn\s+(add|remove|install|publish)/i,
+	/\bpnpm\s+(add|remove|install|publish)/i,
+	/\bpip\s+(install|uninstall)/i,
+	/\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i,
+	/\bbrew\s+(install|uninstall|upgrade)/i,
+	/\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout|branch\s+-[dD]|stash|cherry-pick|revert|tag|init|clone)/i,
+	/\bsudo\b/i,
+	/\bsu\b/i,
+	/\bkill\b/i,
+	/\bpkill\b/i,
+	/\bkillall\b/i,
+	/\breboot\b/i,
+	/\bshutdown\b/i,
+	/\bsystemctl\s+(start|stop|restart|enable|disable)/i,
+	/\bservice\s+\S+\s+(start|stop|restart)/i,
+	/\b(vim?|nano|emacs|code|subl)\b/i,
+];
+
+const SAFE_PATTERNS = [
+	/^\s*cat\b/,
+	/^\s*head\b/,
+	/^\s*tail\b/,
+	/^\s*less\b/,
+	/^\s*more\b/,
+	/^\s*grep\b/,
+	/^\s*find\b/,
+	/^\s*ls\b/,
+	/^\s*pwd\b/,
+	/^\s*echo\b/,
+	/^\s*printf\b/,
+	/^\s*wc\b/,
+	/^\s*sort\b/,
+	/^\s*uniq\b/,
+	/^\s*diff\b/,
+	/^\s*file\b/,
+	/^\s*stat\b/,
+	/^\s*du\b/,
+	/^\s*df\b/,
+	/^\s*tree\b/,
+	/^\s*which\b/,
+	/^\s*whereis\b/,
+	/^\s*type\b/,
+	/^\s*env\b/,
+	/^\s*printenv\b/,
+	/^\s*uname\b/,
+	/^\s*whoami\b/,
+	/^\s*id\b/,
+	/^\s*date\b/,
+	/^\s*cal\b/,
+	/^\s*uptime\b/,
+	/^\s*ps\b/,
+	/^\s*top\b/,
+	/^\s*htop\b/,
+	/^\s*free\b/,
+	/^\s*git\s+(status|log|diff|show|branch|remote|config\s+--get)/i,
+	/^\s*git\s+ls-/i,
+	/^\s*npm\s+(list|ls|view|info|search|outdated|audit)/i,
+	/^\s*yarn\s+(list|info|why|audit)/i,
+	/^\s*node\s+--version/i,
+	/^\s*python\s+--version/i,
+	/^\s*curl\s/i,
+	/^\s*wget\s+-O\s*-/i,
+	/^\s*jq\b/,
+	/^\s*sed\s+-n/i,
+	/^\s*awk\b/,
+	/^\s*rg\b/,
+	/^\s*fd\b/,
+	/^\s*bat\b/,
+	/^\s*exa\b/,
+];
+
+export function isSafeAskModeCommand(command: string): boolean {
+	const isDestructive = DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command));
+	const isSafe = SAFE_PATTERNS.some((pattern) => pattern.test(command));
+	return !isDestructive && isSafe;
+}
diff --git a/pi/agent/extensions/btw/README.md b/pi/agent/extensions/btw/README.md
new file mode 100644
index 0000000..cf39e1c
--- /dev/null
+++ b/pi/agent/extensions/btw/README.md
@@ -0,0 +1,48 @@
+# BTW
+
+Ephemeral side questions for Pi.
+
+This extension adds `/btw`, modeled after Claude Code's side-question flow:
+
+- it uses the current branch conversation as context
+- it asks a separate one-shot question with the current model
+- it does not add the side question or answer to session history
+- it does not expose tools to that side question
+
+## Command
+
+- `/btw <question>`
+  Ask a quick side question without changing the main thread history.
+
+## Usage Flow
+
+### Flow 1: Ask a quick side question
+
+```text
+/btw Why did the current taskwarrior loop happen?
+```
+
+Pi will answer in a temporary overlay. Close it with `Esc`, `Enter`, or `Space`.
+
+### Flow 2: Use it while you are in the middle of another task
+
+```text
+/btw Remind me which file currently owns the SSH host key bootstrap logic.
+```
+
+This is meant for detours and clarifications. The main conversation stays clean.
+
+### Flow 3: Use it in non-interactive mode
+
+```bash
+pi --model openai/gpt-4.1 --no-session -p '/btw Reply with exactly BTW_OK'
+```
+
+In non-interactive mode, the answer is printed directly to stdout.
+
+## Notes And Limits
+
+- `/btw` uses the currently selected model.
+- The side question gets current branch context, not a fresh context.
+- It has no tools. If the answer is not derivable from the supplied context, it should say so.
+- It is best for short clarifications, not long implementation work.
diff --git a/pi/agent/extensions/btw/index.ts b/pi/agent/extensions/btw/index.ts
new file mode 100644
index 0000000..286c52e
--- /dev/null
+++ b/pi/agent/extensions/btw/index.ts
@@ -0,0 +1,230 @@
+import { complete, type Message, type TextContent, type UserMessage } from "@mariozechner/pi-ai";
+import {
+	BorderedLoader,
+	convertToLlm,
+	type ExtensionAPI,
+	type ExtensionCommandContext,
+	type SessionEntry,
+	type Theme,
+} from "@mariozechner/pi-coding-agent";
+import { matchesKey, truncateToWidth, visibleWidth } from "@mariozechner/pi-tui";
+
+const SYSTEM_PROMPT = `You are answering a side question for the user.
+
+Rules:
+- Use the supplied conversation context if it is relevant.
+- Answer the side question directly and concisely.
+- Do not use tools.
+- Do not invent facts that are not supported by the supplied context.
+- If the answer is not available from the supplied conversation context, say so plainly.
+- Keep the answer short by default unless the user explicitly asks for depth.`;
+
+function extractResponseText(message: Message): string {
+	return message.content
+		.filter((block): block is TextContent => block.type === "text")
+		.map((block) => block.text)
+		.join("\n")
+		.trim();
+}
+
+function getConversationMessages(ctx: ExtensionCommandContext): Message[] {
+	const branch = ctx.sessionManager.getBranch();
+	return branch
+		.filter((entry): entry is SessionEntry & { type: "message" } => entry.type === "message")
+		.map((entry) => entry.message);
+}
+
+function wrapParagraph(text: string, width: number): string[] {
+	if (width <= 1) return [text];
+	if (!text.trim()) return [""];
+
+	const words = text.split(/\s+/).filter(Boolean);
+	if (words.length === 0) return [""];
+
+	const lines: string[] = [];
+	let current = "";
+
+	for (const word of words) {
+		const next = current ? `${current} ${word}` : word;
+		if (visibleWidth(next) <= width) {
+			current = next;
+			continue;
+		}
+
+		if (current) lines.push(current);
+
+		if (visibleWidth(word) <= width) {
+			current = word;
+			continue;
+		}
+
+		let remainder = word;
+		while (visibleWidth(remainder) > width) {
+			lines.push(truncateToWidth(remainder, width, ""));
+			remainder = remainder.slice(lines[lines.length - 1]!.length);
+		}
+		current = remainder;
+	}
+
+	if (current) lines.push(current);
+	return lines.length > 0 ? lines : [""];
+}
+
+function wrapText(text: string, width: number): string[] {
+	return text.split(/\r?\n/).flatMap((line) => wrapParagraph(line, width));
+}
+
+class BtwOverlay {
+	constructor(
+		private readonly theme: Theme,
+		private readonly question: string,
+		private readonly answer: string,
+		private readonly done: () => void,
+	) {}
+
+	handleInput(data: string): void {
+		if (matchesKey(data, "escape") || matchesKey(data, "return") || data === " " || data === "\r") {
+			this.done();
+		}
+	}
+
+	render(width: number): string[] {
+		const innerWidth = Math.max(20, width - 2);
+		const contentWidth = Math.max(10, innerWidth - 2);
+		const lines: string[] = [];
+
+		const pad = (text: string) => {
+			const visible = visibleWidth(text);
+			return text + " ".repeat(Math.max(0, innerWidth - visible));
+		};
+
+		const row = (text = "") => `${this.theme.fg("border", "│")}${pad(text)}${this.theme.fg("border", "│")}`;
+		const addWrappedSection = (label: string, value: string) => {
+			lines.push(row(` ${this.theme.fg("accent", label)}`));
+			for (const wrapped of wrapText(value || "(no answer)", contentWidth)) {
+				lines.push(row(` ${wrapped}`));
+			}
+			lines.push(row());
+		};
+
+		lines.push(this.theme.fg("border", `╭${"─".repeat(innerWidth)}╮`));
+		lines.push(row(` ${this.theme.fg("accent", "BTW")}${this.theme.fg("muted", "  Side question")}`));
+		lines.push(row());
+		addWrappedSection("Question", this.question);
+		addWrappedSection("Answer", this.answer || "(no answer)");
+		lines.push(row(this.theme.fg("dim", " Esc, Enter, or Space to close")));
+		lines.push(this.theme.fg("border", `╰${"─".repeat(innerWidth)}╯`));
+
+		return lines;
+	}
+
+	invalidate(): void {}
+}
+
+async function runBtw(question: string, ctx: ExtensionCommandContext): Promise<string> {
+	if (!ctx.model) {
+		throw new Error("No model selected.");
+	}
+
+	const branchMessages = getConversationMessages(ctx);
+	const llmMessages = convertToLlm(branchMessages);
+	const apiKey = await ctx.modelRegistry.getApiKey(ctx.model);
+	const userMessage: UserMessage = {
+		role: "user",
+		content: [{ type: "text", text: question }],
+		timestamp: Date.now(),
+	};
+
+	const response = await complete(
+		ctx.model,
+		{
+			systemPrompt: SYSTEM_PROMPT,
+			messages: [...llmMessages, userMessage],
+		},
+		{ apiKey },
+	);
+
+	if (response.stopReason === "aborted") {
+		throw new Error("Cancelled.");
+	}
+
+	return extractResponseText(response) || "(no answer)";
+}
+
+export default function btwExtension(pi: ExtensionAPI): void {
+	pi.registerCommand("btw", {
+		description: "Ask a quick side question without adding it to the conversation",
+		handler: async (args, ctx) => {
+			const question = args.trim();
+			if (!question) {
+				const usage = "Usage: /btw <side question>";
+				if (!ctx.hasUI) process.stdout.write(`${usage}\n`);
+				else ctx.ui.notify(usage, "warning");
+				return;
+			}
+
+			if (!ctx.model) {
+				const error = "No model selected.";
+				if (!ctx.hasUI) process.stdout.write(`${error}\n`);
+				else ctx.ui.notify(error, "error");
+				return;
+			}
+
+			if (!ctx.hasUI) {
+				try {
+					const answer = await runBtw(question, ctx);
+					process.stdout.write(`${answer}\n`);
+				} catch (error) {
+					const text = error instanceof Error ? error.message : String(error);
+					process.stdout.write(`${text}\n`);
+				}
+				return;
+			}
+
+			const answer = await ctx.ui.custom<string | null>(
+				(tui, theme, _kb, done) => {
+					const loader = new BorderedLoader(tui, theme, `Asking BTW using ${ctx.model!.id}...`);
+					loader.onAbort = () => done(null);
+
+					runBtw(question, ctx)
+						.then(done)
+						.catch((error) => {
+							const text = error instanceof Error ? error.message : String(error);
+							done(`BTW failed: ${text}`);
+						});
+
+					return loader;
+				},
+				{
+					overlay: true,
+					overlayOptions: {
+						width: "50%",
+						minWidth: 50,
+						maxHeight: "80%",
+						anchor: "right-center",
+						offsetX: -1,
+					},
+				},
+			);
+
+			if (answer === null) {
+				ctx.ui.notify("BTW cancelled.", "info");
+				return;
+			}
+
+			await ctx.ui.custom<void>(
+				(_tui, theme, _kb, done) => new BtwOverlay(theme, question, answer, done),
+				{
+					overlay: true,
+					overlayOptions: {
+						width: "55%",
+						minWidth: 56,
+						maxHeight: "85%",
+						anchor: "right-center",
+						offsetX: -1,
+					},
+				},
+			);
+		},
+	});
+}
diff --git a/pi/agent/extensions/nemotron-tool-repair/README.md b/pi/agent/extensions/nemotron-tool-repair/README.md
new file mode 100644
index 0000000..69fcb27
--- /dev/null
+++ b/pi/agent/extensions/nemotron-tool-repair/README.md
@@ -0,0 +1,87 @@
+# Nemotron Tool Repair
+
+Makes Hyperstack Nemotron sessions more reliable inside Pi when tools are
+enabled.
+
+It does two things:
+
+- adds a Nemotron-specific tool-use hint to the system prompt so the model stops
+  narrating before acting
+- wraps the Hyperstack OpenAI-compatible providers and repairs raw
+  `<tool_call> ... </tool_call>` text into real Pi tool calls when vLLM misses it
+
+This keeps your existing model names and startup scripts unchanged.
+
+## What It Affects
+
+Only Hyperstack Nemotron models are changed:
+
+- `hyperstack1/cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit`
+- `hyperstack2/cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit`
+
+Other Hyperstack models such as Qwen3 Coder still use the same endpoints and
+same model IDs, but they do not go through the Nemotron repair path.
+
+## Usage Flow
+
+Start Pi the same way as before:
+
+```bash
+cd /home/paul/git/conf/snippets/hyperstack
+./pi-vm1
+```
+
+or explicitly:
+
+```bash
+pi --model 'hyperstack1/cyankiwi/NVIDIA-Nemotron-3-Super-120B-A12B-AWQ-4bit'
+```
+
+Then use Pi normally. There are no new commands for this extension.
+
+When Nemotron behaves well:
+
+- Pi receives a normal structured tool call
+- the extension stays out of the way
+
+When Nemotron emits raw XML-like tool text instead:
+
+- the extension buffers that assistant turn
+- parses `<tool_call>`, `<function=...>`, and `<parameter=...>` blocks
+- converts them into real Pi tool calls
+- hands the repaired assistant message back to the agent loop
+
+## What The Repair Handles
+
+The repair path is aimed at outputs shaped like this:
+
+```text
+<tool_call>
+<function=bash>
+<parameter=command>
+pwd
+</parameter>
+</function>
+</tool_call>
+```
+
+It preserves surrounding text if Nemotron narrated before the tool call.
+
+## Practical Notes
+
+- The repair path only runs when tools are active.
+- Nemotron tool turns are buffered before they are shown, so those turns may
+  feel less streaming than Qwen or GPT.
+- The extension also disables `strict` in OpenAI-compatible tool schemas for
+  the Hyperstack providers, which removes the repeated vLLM warning about
+  ignored `strict` fields.
+
+## If You Want To Disable It
+
+Temporarily disable the extension by moving or renaming this directory:
+
+```text
+~/.pi/agent/extensions/nemotron-tool-repair
+```
+
+Then restart Pi or use `/reload` in an existing Pi session.
diff --git a/pi/agent/extensions/nemotron-tool-repair/index.ts b/pi/agent/extensions/nemotron-tool-repair/index.ts
new file mode 100644
index 0000000..e06609c
--- /dev/null
+++ b/pi/agent/extensions/nemotron-tool-repair/index.ts
@@ -0,0 +1,480 @@
+import { readFileSync } from "node:fs";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+import {
+	createAssistantMessageEventStream,
+	type AssistantMessage,
+	type Context,
+	type Model,
+	type OpenAICompletionsCompat,
+	type SimpleStreamOptions,
+	streamSimpleOpenAICompletions,
+	type TextContent,
+	type ThinkingContent,
+	type Tool,
+	type ToolCall,
+	type Usage,
+} from "@mariozechner/pi-ai";
+import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
+
+const CUSTOM_API = "hyperstack-openai-completions-repaired";
+const TARGET_PROVIDERS = new Set(["hyperstack1", "hyperstack2"]);
+const NEMOTRON_MODEL_PATTERN = /NVIDIA-Nemotron-3-Super/i;
+const MODELS_JSON_PATH = path.resolve(
+	path.dirname(fileURLToPath(import.meta.url)),
+	"..",
+	"..",
+	"models.json",
+);
+
+const NEMOTRON_TOOL_DISCIPLINE = `
+Additional tool-use discipline for this model:
+- If a tool is needed, call it immediately.
+- Do not narrate that you are about to call a tool.
+- Do not emit example tool-call markup or pseudo-tool syntax for the user to read.
+- Emit at most one tool invocation at a time, then wait for the tool result.
+- After a tool result, continue from that result instead of restating the plan.
+`.trim();
+
+interface FileModelConfig {
+	id: string;
+	name: string;
+	reasoning: boolean;
+	input: ("text" | "image")[];
+	cost: { input: number; output: number; cacheRead: number; cacheWrite: number };
+	contextWindow: number;
+	maxTokens: number;
+	compat?: OpenAICompletionsCompat;
+}
+
+interface FileProviderConfig {
+	baseUrl: string;
+	apiKey: string;
+	api?: string;
+	compat?: OpenAICompletionsCompat;
+	models: FileModelConfig[];
+}
+
+interface FileConfig {
+	providers: Record<string, FileProviderConfig>;
+}
+
+type AssistantBlock = TextContent | ThinkingContent | ToolCall;
+
+function isNemotronModel(model: Pick<Model<any>, "id"> | undefined): boolean {
+	return Boolean(model && NEMOTRON_MODEL_PATTERN.test(model.id));
+}
+
+function withRepairedCompat(compat?: OpenAICompletionsCompat): OpenAICompletionsCompat {
+	return {
+		...(compat || {}),
+		supportsStrictMode: false,
+	};
+}
+
+function loadProviderConfig(): FileConfig {
+	const raw = readFileSync(MODELS_JSON_PATH, "utf8");
+	return JSON.parse(raw) as FileConfig;
+}
+
+function cloneUsage(usage: Usage): Usage {
+	return {
+		input: usage.input,
+		output: usage.output,
+		cacheRead: usage.cacheRead,
+		cacheWrite: usage.cacheWrite,
+		totalTokens: usage.totalTokens,
+		cost: {
+			input: usage.cost.input,
+			output: usage.cost.output,
+			cacheRead: usage.cost.cacheRead,
+			cacheWrite: usage.cost.cacheWrite,
+			total: usage.cost.total,
+		},
+	};
+}
+
+function cloneBlock(block: AssistantBlock): AssistantBlock {
+	switch (block.type) {
+		case "text":
+			return { ...block };
+		case "thinking":
+			return { ...block };
+		case "toolCall":
+			return {
+				...block,
+				arguments: { ...block.arguments },
+			};
+	}
+}
+
+function buildStreamingAssistantMessage(source: AssistantMessage): AssistantMessage {
+	return {
+		...source,
+		content: [],
+		usage: cloneUsage(source.usage),
+	};
+}
+
+function emitAssistantMessage(
+	stream: ReturnType<typeof createAssistantMessageEventStream>,
+	source: AssistantMessage,
+): void {
+	const output = buildStreamingAssistantMessage(source);
+	stream.push({ type: "start", partial: output });
+
+	for (const sourceBlock of source.content) {
+		if (sourceBlock.type === "text") {
+			const block: TextContent = { type: "text", text: "" };
+			output.content.push(block);
+			const contentIndex = output.content.length - 1;
+			stream.push({ type: "text_start", contentIndex, partial: output });
+			if (sourceBlock.text) {
+				block.text = sourceBlock.text;
+				stream.push({
+					type: "text_delta",
+					contentIndex,
+					delta: sourceBlock.text,
+					partial: output,
+				});
+			}
+			stream.push({
+				type: "text_end",
+				contentIndex,
+				content: sourceBlock.text,
+				partial: output,
+			});
+			continue;
+		}
+
+		if (sourceBlock.type === "thinking") {
+			const block: ThinkingContent = {
+				type: "thinking",
+				thinking: "",
+				thinkingSignature: sourceBlock.thinkingSignature,
+				redacted: sourceBlock.redacted,
+			};
+			output.content.push(block);
+			const contentIndex = output.content.length - 1;
+			stream.push({ type: "thinking_start", contentIndex, partial: output });
+			if (sourceBlock.thinking) {
+				block.thinking = sourceBlock.thinking;
+				stream.push({
+					type: "thinking_delta",
+					contentIndex,
+					delta: sourceBlock.thinking,
+					partial: output,
+				});
+			}
+			stream.push({
+				type: "thinking_end",
+				contentIndex,
+				content: sourceBlock.thinking,
+				partial: output,
+			});
+			continue;
+		}
+
+		const block: ToolCall = {
+			type: "toolCall",
+			id: sourceBlock.id,
+			name: sourceBlock.name,
+			arguments: {},
+			thoughtSignature: sourceBlock.thoughtSignature,
+		};
+		output.content.push(block);
+		const contentIndex = output.content.length - 1;
+		stream.push({ type: "toolcall_start", contentIndex, partial: output });
+		const argsJson = JSON.stringify(sourceBlock.arguments || {});
+		if (argsJson && argsJson !== "{}") {
+			block.arguments = { ...sourceBlock.arguments };
+			stream.push({
+				type: "toolcall_delta",
+				contentIndex,
+				delta: argsJson,
+				partial: output,
+			});
+		} else {
+			block.arguments = { ...sourceBlock.arguments };
+		}
+		stream.push({
+			type: "toolcall_end",
+			contentIndex,
+			toolCall: block,
+			partial: output,
+		});
+	}
+
+	if (source.stopReason === "error" || source.stopReason === "aborted") {
+		stream.push({
+			type: "error",
+			reason: source.stopReason,
+			error: {
+				...output,
+				stopReason: source.stopReason,
+				errorMessage: source.errorMessage,
+			},
+		});
+		stream.end();
+		return;
+	}
+
+	stream.push({
+		type: "done",
+		reason: source.stopReason,
+		message: {
+			...output,
+			stopReason: source.stopReason,
+		},
+	});
+	stream.end();
+}
+
+function mergeAdjacentTextBlocks(blocks: AssistantBlock[]): AssistantBlock[] {
+	const merged: AssistantBlock[] = [];
+
+	for (const block of blocks) {
+		const previous = merged[merged.length - 1];
+		if (block.type === "text" && previous?.type === "text") {
+			previous.text += block.text;
+			continue;
+		}
+		merged.push(cloneBlock(block));
+	}
+
+	return merged;
+}
+
+function parseToolCallPayload(
+	payload: string,
+	runIdPrefix: string,
+	index: number,
+	allowedTools: Set<string>,
+): ToolCall | undefined {
+	const functionMatch = payload.match(/<function=([^>\s]+)>/i);
+	if (!functionMatch) return undefined;
+
+	const toolName = functionMatch[1].trim();
+	if (allowedTools.size > 0 && !allowedTools.has(toolName)) return undefined;
+
+	const args: Record<string, string> = {};
+	const parameterRegex = /<parameter=([^>\s]+)>\s*([\s\S]*?)\s*<\/parameter>/gi;
+	let parameterMatch: RegExpExecArray | null;
+
+	while ((parameterMatch = parameterRegex.exec(payload)) !== null) {
+		const key = parameterMatch[1].trim();
+		const value = parameterMatch[2].replace(/\r/g, "").trim();
+		args[key] = value;
+	}
+
+	if (Object.keys(args).length === 0) return undefined;
+
+	return {
+		type: "toolCall",
+		id: `${runIdPrefix}-repair-${index}`,
+		name: toolName,
+		arguments: args,
+	};
+}
+
+export function repairTextBlock(
+	text: string,
+	responseId: string | undefined,
+	allowedTools: Set<string>,
+): AssistantBlock[] | undefined {
+	const toolRegex = /<tool_call>\s*([\s\S]*?)<\/tool_call>/gi;
+	const repaired: AssistantBlock[] = [];
+	let lastIndex = 0;
+	let callCount = 0;
+	let matched = false;
+	let match: RegExpExecArray | null;
+
+	while ((match = toolRegex.exec(text)) !== null) {
+		matched = true;
+		const prefix = text.slice(lastIndex, match.index);
+		if (prefix) repaired.push({ type: "text", text: prefix });
+
+		callCount += 1;
+		const toolCall = parseToolCallPayload(
+			match[1],
+			responseId || `nemotron-${Date.now()}`,
+			callCount,
+			allowedTools,
+		);
+		if (!toolCall) return undefined;
+		repaired.push(toolCall);
+		lastIndex = toolRegex.lastIndex;
+	}
+
+	if (!matched) return undefined;
+
+	const suffix = text.slice(lastIndex);
+	if (suffix) repaired.push({ type: "text", text: suffix });
+
+	return mergeAdjacentTextBlocks(repaired);
+}
+
+export function repairNemotronAssistantMessage(
+	message: AssistantMessage,
+	context: Context,
+): AssistantMessage | undefined {
+	if (message.content.some((block) => block.type === "toolCall")) return undefined;
+	if (message.stopReason === "error" || message.stopReason === "aborted") return undefined;
+
+	const allowedTools = new Set((context.tools || []).map((tool: Tool) => tool.name));
+	const repairedContent: AssistantBlock[] = [];
+	let repaired = false;
+
+	for (const block of message.content) {
+		if (block.type !== "text" || !block.text.includes("<tool_call>")) {
+			repairedContent.push(cloneBlock(block));
+			continue;
+		}
+
+		const repairedBlocks = repairTextBlock(block.text, message.responseId, allowedTools);
+		if (!repairedBlocks) {
+			repairedContent.push(cloneBlock(block));
+			continue;
+		}
+
+		repaired = repairedBlocks.some((entry) => entry.type === "toolCall");
+		repairedContent.push(...repairedBlocks);
+	}
+
+	if (!repaired) return undefined;
+
+	return {
+		...message,
+		content: mergeAdjacentTextBlocks(repairedContent),
+		stopReason: "toolUse",
+	};
+}
+
+function applyNemotronPromptHints(context: Context, model: Model<any>): Context {
+	if (!isNemotronModel(model) || !context.tools || context.tools.length === 0) return context;
+	const basePrompt = context.systemPrompt || "";
+	if (basePrompt.includes(NEMOTRON_TOOL_DISCIPLINE)) return context;
+
+	return {
+		...context,
+		systemPrompt: basePrompt ? `${basePrompt}\n\n${NEMOTRON_TOOL_DISCIPLINE}` : NEMOTRON_TOOL_DISCIPLINE,
+	};
+}
+
+function createShadowModel(model: Model<any>): Model<"openai-completions"> {
+	return {
+		...model,
+		api: "openai-completions",
+		compat: withRepairedCompat(model.compat as OpenAICompletionsCompat | undefined),
+	};
+}
+
+function streamHyperstackRepaired(
+	model: Model<typeof CUSTOM_API>,
+	context: Context,
+	options?: SimpleStreamOptions,
+) {
+	const shadowModel = createShadowModel(model);
+	const preparedContext = applyNemotronPromptHints(context, model);
+	const preparedOptions: SimpleStreamOptions = isNemotronModel(model) && preparedContext.tools?.length
+		? { ...options, temperature: options?.temperature ?? 0 }
+		: { ...options };
+
+	if (!isNemotronModel(model) || !preparedContext.tools || preparedContext.tools.length === 0) {
+		return streamSimpleOpenAICompletions(shadowModel, preparedContext, preparedOptions);
+	}
+
+	const stream = createAssistantMessageEventStream();
+
+	(async () => {
+		try {
+			const inner = streamSimpleOpenAICompletions(shadowModel, preparedContext, preparedOptions);
+			let finalMessage: AssistantMessage | undefined;
+
+			for await (const event of inner) {
+				if (event.type === "done") {
+					finalMessage = event.message;
+				} else if (event.type === "error") {
+					finalMessage = event.error;
+				}
+			}
+
+			if (!finalMessage) {
+				throw new Error("Nemotron provider returned no final message.");
+			}
+
+			const repairedMessage = repairNemotronAssistantMessage(finalMessage, preparedContext) || finalMessage;
+			emitAssistantMessage(stream, repairedMessage);
+		} catch (error) {
+			const message = error instanceof Error ? error.message : String(error);
+			const output: AssistantMessage = {
+				role: "assistant",
+				content: [],
+				api: model.api,
+				provider: model.provider,
+				model: model.id,
+				usage: {
+					input: 0,
+					output: 0,
+					cacheRead: 0,
+					cacheWrite: 0,
+					totalTokens: 0,
+					cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+				},
+				stopReason: options?.signal?.aborted ? "aborted" : "error",
+				errorMessage: message,
+				timestamp: Date.now(),
+			};
+
+			stream.push({ type: "start", partial: output });
+			stream.push({
+				type: "error",
+				reason: output.stopReason,
+				error: output,
+			});
+			stream.end();
+		}
+	})();
+
+	return stream;
+}
+
+function registerHyperstackProviderOverrides(pi: ExtensionAPI): void {
+	const fileConfig = loadProviderConfig();
+
+	for (const [providerName, providerConfig] of Object.entries(fileConfig.providers)) {
+		if (!TARGET_PROVIDERS.has(providerName)) continue;
+
+		pi.registerProvider(providerName, {
+			baseUrl: providerConfig.baseUrl,
+			apiKey: providerConfig.apiKey,
+			api: CUSTOM_API,
+			compat: withRepairedCompat(providerConfig.compat),
+			models: providerConfig.models.map((modelConfig) => ({
+				...modelConfig,
+				api: CUSTOM_API,
+				compat: withRepairedCompat(modelConfig.compat || providerConfig.compat),
+			})),
+			streamSimple: streamHyperstackRepaired,
+		});
+	}
+}
+
+function shouldAppendNemotronDiscipline(ctx: ExtensionContext): boolean {
+	return isNemotronModel(ctx.model) && ctx.model?.provider && TARGET_PROVIDERS.has(ctx.model.provider) && piHasTools();
+}
+
+let piHasTools = () => true;
+
+export default function nemotronToolRepairExtension(pi: ExtensionAPI): void {
+	piHasTools = () => pi.getActiveTools().length > 0;
+	registerHyperstackProviderOverrides(pi);
+
+	pi.on("before_agent_start", async (event, ctx) => {
+		if (!shouldAppendNemotronDiscipline(ctx)) return;
+		if (event.systemPrompt.includes(NEMOTRON_TOOL_DISCIPLINE)) return;
+		return {
+			systemPrompt: `${event.systemPrompt}\n\n${NEMOTRON_TOOL_DISCIPLINE}`,
+		};
+	});
+}
diff --git a/pi/agent/extensions/taskwarrior-plan-mode/README.md b/pi/agent/extensions/taskwarrior-plan-mode/README.md
index b280bcb..e75c4d5 100644
--- a/pi/agent/extensions/taskwarrior-plan-mode/README.md
+++ b/pi/agent/extensions/taskwarrior-plan-mode/README.md
@@ -31,6 +31,10 @@ todo list.
   Show started and `+READY` tasks for the current repo.
 - `/task-next [run]`
   Focus the started task, or start the next `+READY` task.
+- `/task-exit`
+  Leave Taskwarrior focus mode.
+- `/task-unfocus`
+  Alias for `/task-exit`.
 - `/work-on-tasks [strategy] [max]`
   Kick off the Taskwarrior execution loop aligned to the
   `taskwarrior-task-management` workflow.
@@ -117,6 +121,12 @@ Focus and immediately start execution:
 /task-next run
 ```
 
+Leave focus mode again:
+
+```text
+/task-exit
+```
+
 Run the full repo task loop:
 
 ```text
diff --git a/pi/agent/extensions/taskwarrior-plan-mode/index.ts b/pi/agent/extensions/taskwarrior-plan-mode/index.ts
index 59a223e..0f8b12c 100644
--- a/pi/agent/extensions/taskwarrior-plan-mode/index.ts
+++ b/pi/agent/extensions/taskwarrior-plan-mode/index.ts
@@ -398,6 +398,21 @@ export default function taskwarriorPlanModeExtension(pi: ExtensionAPI): void {
 		await setPlanModeEnabled(false, ctx);
 	}
 
+	async function exitExecutionMode(ctx: ExtensionContext): Promise<void> {
+		if (!executionMode) {
+			ctx.ui.notify("Taskwarrior focus mode is not enabled.", "info");
+			return;
+		}
+
+		executionMode = false;
+		executionTaskUuid = undefined;
+		repeatedTaskLookups.clear();
+		pi.setActiveTools(normalTools);
+		persistState();
+		await updateStatus(ctx);
+		ctx.ui.notify("Taskwarrior focus mode disabled.", "info");
+	}
+
 	async function createTasksFromPlan(
 		mode: "sequential" | "independent",
 		ctx: ExtensionContext,
@@ -519,6 +534,20 @@ export default function taskwarriorPlanModeExtension(pi: ExtensionAPI): void {
 		},
 	});
 
+	pi.registerCommand("task-exit", {
+		description: "Leave Taskwarrior focus mode",
+		handler: async (_args, ctx) => {
+			await exitExecutionMode(ctx);
+		},
+	});
+
+	pi.registerCommand("task-unfocus", {
+		description: "Alias for /task-exit",
+		handler: async (_args, ctx) => {
+			await exitExecutionMode(ctx);
+		},
+	});
+
 	pi.registerCommand("task-update", {
 		description: "Replace a task description: /task-update <selector> :: <new description>",
 		handler: async (args, ctx) => {