Files
pi-skill/extensions/message-integrity-guard.ts
2026-05-25 16:41:08 +07:00

399 lines
14 KiB
TypeScript

/**
* Message Integrity Guard Extension
*
* Prevents the "session-bricking" bug where orphaned tool_result messages
* (tool_results without their matching tool_use in the preceding assistant message)
* cause unrecoverable 400 errors from the Anthropic API:
*
* "unexpected tool_use_id found in tool_result blocks: toolu_XXXX.
* Each tool_result block must have a corresponding tool_use block
* in the previous message."
*
* Root causes this guards against:
* 1. Context compaction cutting between tool_use and tool_result
* 2. Session save/restore losing messages
* 3. Interrupted tool calls leaving partial history
*
* Strategy:
* - On every LLM call (context event): validate and repair message ordering
* - On compaction (session_before_compact): validate cut-point integrity
* - On session restore (session_switch): validate restored history
*
* The "context" event is the last line of defense — it fires right before
* messages are sent to the API, so we can catch and fix any corruption
* regardless of how it happened.
*/
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
// ============================================================================
// Types (minimal, matching what we see in the message objects)
// ============================================================================
interface ToolCall {
type: "toolCall";
id: string;
name: string;
arguments: Record<string, any>;
}
interface AssistantMessage {
role: "assistant";
content: Array<{ type: string; id?: string; name?: string; [key: string]: any }>;
stopReason?: string;
errorMessage?: string;
[key: string]: any;
}
interface ToolResultMessage {
role: "toolResult";
toolCallId: string;
toolName: string;
content: Array<{ type: string; text?: string; [key: string]: any }>;
isError: boolean;
timestamp: number;
[key: string]: any;
}
interface UserMessage {
role: "user";
content: string | Array<{ type: string; [key: string]: any }>;
timestamp: number;
[key: string]: any;
}
type Message = AssistantMessage | ToolResultMessage | UserMessage | { role: string; [key: string]: any };
// ============================================================================
// Repair Logic
// ============================================================================
/**
* Validate and repair tool_use/tool_result pairing in a message array.
*
* Rules enforced (matching Anthropic API contract):
* 1. Every tool_result must reference a tool_use from the immediately
* preceding assistant message
* 2. Every tool_use in an assistant message should have a corresponding
* tool_result (if missing, transform-messages.js handles this — we
* add synthetic results as a backup)
* 3. No orphaned tool_results without matching tool_use
*
* Returns { messages, repairs } where repairs lists what was fixed.
*/
function validateAndRepairMessages(messages: Message[]): {
messages: Message[];
repairs: string[];
} {
const repairs: string[] = [];
const result: Message[] = [];
// Track the tool_use IDs from the most recent assistant message
let currentToolUseIds = new Set<string>();
// Track which tool_use IDs have been satisfied by tool_results
let satisfiedToolUseIds = new Set<string>();
for (let i = 0; i < messages.length; i++) {
const msg = messages[i];
if (msg.role === "assistant") {
const assistantMsg = msg as AssistantMessage;
// Before processing a new assistant message, check if the previous
// assistant's tool calls all got results. If not, synthesize them.
if (currentToolUseIds.size > 0) {
for (const toolId of currentToolUseIds) {
if (!satisfiedToolUseIds.has(toolId)) {
// Find the tool call info
const prevAssistant = findPreviousAssistant(result);
const toolCall = prevAssistant?.content.find(
(b: any) => b.type === "toolCall" && b.id === toolId,
) as ToolCall | undefined;
const syntheticResult: ToolResultMessage = {
role: "toolResult",
toolCallId: toolId,
toolName: toolCall?.name ?? "unknown",
content: [{ type: "text", text: "[Result lost during session recovery]" }],
isError: true,
timestamp: Date.now(),
};
result.push(syntheticResult);
repairs.push(
`Synthesized missing tool_result for tool_use ${toolId} (${toolCall?.name ?? "unknown"})`,
);
}
}
}
// Skip error/aborted assistant messages (transform-messages.js also does this,
// but we do it here too as defense in depth)
if (assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") {
result.push(msg);
currentToolUseIds = new Set();
satisfiedToolUseIds = new Set();
continue;
}
// Extract tool_use IDs from this assistant message
currentToolUseIds = new Set<string>();
satisfiedToolUseIds = new Set<string>();
if (Array.isArray(assistantMsg.content)) {
for (const block of assistantMsg.content) {
if (block.type === "toolCall" && block.id) {
currentToolUseIds.add(block.id);
}
}
}
result.push(msg);
} else if (msg.role === "toolResult") {
const toolResult = msg as ToolResultMessage;
// Check: does this tool_result reference a tool_use in the current
// assistant message's tool calls?
if (currentToolUseIds.has(toolResult.toolCallId)) {
// Valid pairing
satisfiedToolUseIds.add(toolResult.toolCallId);
result.push(msg);
} else {
// ORPHANED tool_result — this is the bug that causes 400 errors!
// Check if any previous assistant in the history had this tool_use
const ownerAssistant = findAssistantWithToolUse(result, toolResult.toolCallId);
if (ownerAssistant) {
repairs.push(
`Removed orphaned tool_result for ${toolResult.toolName} ` +
`(tool_use_id: ${toolResult.toolCallId}) — ` +
`tool_use was in an earlier assistant message, not the immediately preceding one. ` +
`This was likely caused by compaction or session restoration.`,
);
} else {
repairs.push(
`Removed orphaned tool_result for ${toolResult.toolName} ` +
`(tool_use_id: ${toolResult.toolCallId}) — ` +
`no matching tool_use found anywhere in history. ` +
`The assistant message was likely lost during compaction or session restore.`,
);
}
// DROP the orphaned tool_result — do NOT add to result
}
} else if (msg.role === "user") {
// User messages break the tool flow. Check for unsatisfied tool calls.
if (currentToolUseIds.size > 0) {
for (const toolId of currentToolUseIds) {
if (!satisfiedToolUseIds.has(toolId)) {
const prevAssistant = findPreviousAssistant(result);
const toolCall = prevAssistant?.content.find(
(b: any) => b.type === "toolCall" && b.id === toolId,
) as ToolCall | undefined;
const syntheticResult: ToolResultMessage = {
role: "toolResult",
toolCallId: toolId,
toolName: toolCall?.name ?? "unknown",
content: [{ type: "text", text: "[Result lost — user interrupted]" }],
isError: true,
timestamp: Date.now(),
};
result.push(syntheticResult);
repairs.push(
`Synthesized missing tool_result for tool_use ${toolId} before user message (interrupted tool call)`,
);
}
}
}
currentToolUseIds = new Set();
satisfiedToolUseIds = new Set();
result.push(msg);
} else {
// compactionSummary, branchSummary, bashExecution, custom, etc.
// These are converted to user messages by convertToLlm(), so they
// break tool flow just like user messages.
if (currentToolUseIds.size > 0) {
for (const toolId of currentToolUseIds) {
if (!satisfiedToolUseIds.has(toolId)) {
const prevAssistant = findPreviousAssistant(result);
const toolCall = prevAssistant?.content.find(
(b: any) => b.type === "toolCall" && b.id === toolId,
) as ToolCall | undefined;
const syntheticResult: ToolResultMessage = {
role: "toolResult",
toolCallId: toolId,
toolName: toolCall?.name ?? "unknown",
content: [{ type: "text", text: "[Result lost during session recovery]" }],
isError: true,
timestamp: Date.now(),
};
result.push(syntheticResult);
repairs.push(
`Synthesized missing tool_result for tool_use ${toolId} before non-standard message`,
);
}
}
currentToolUseIds = new Set();
satisfiedToolUseIds = new Set();
}
result.push(msg);
}
}
// Final check: unsatisfied tool calls at end of history
if (currentToolUseIds.size > 0) {
for (const toolId of currentToolUseIds) {
if (!satisfiedToolUseIds.has(toolId)) {
const prevAssistant = findPreviousAssistant(result);
const toolCall = prevAssistant?.content.find(
(b: any) => b.type === "toolCall" && b.id === toolId,
) as ToolCall | undefined;
const syntheticResult: ToolResultMessage = {
role: "toolResult",
toolCallId: toolId,
toolName: toolCall?.name ?? "unknown",
content: [{ type: "text", text: "[Result lost — end of recovered history]" }],
isError: true,
timestamp: Date.now(),
};
result.push(syntheticResult);
repairs.push(
`Synthesized missing tool_result for tool_use ${toolId} at end of history`,
);
}
}
}
return { messages: result, repairs };
}
/**
* Find the last assistant message in the result array.
*/
function findPreviousAssistant(messages: Message[]): AssistantMessage | undefined {
for (let i = messages.length - 1; i >= 0; i--) {
if (messages[i].role === "assistant") {
return messages[i] as AssistantMessage;
}
}
return undefined;
}
/**
* Find any assistant message in history that contains a tool_use with the given ID.
*/
function findAssistantWithToolUse(messages: Message[], toolUseId: string): AssistantMessage | undefined {
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i];
if (msg.role === "assistant") {
const assistantMsg = msg as AssistantMessage;
if (Array.isArray(assistantMsg.content)) {
for (const block of assistantMsg.content) {
if (block.type === "toolCall" && block.id === toolUseId) {
return assistantMsg;
}
}
}
}
}
return undefined;
}
// ============================================================================
// Extension Entry Point
// ============================================================================
export default function messageIntegrityGuard(pi: ExtensionAPI) {
// Track repair stats for the session
let totalRepairs = 0;
let repairLog: string[] = [];
// ========================================================================
// PRIMARY DEFENSE: Validate messages before every LLM call
// ========================================================================
pi.on("context", async (event, ctx) => {
const { messages, repairs } = validateAndRepairMessages(event.messages);
if (repairs.length > 0) {
totalRepairs += repairs.length;
repairLog.push(...repairs);
// Silent self-healing — no console output for routine repairs
return { messages };
}
// No repairs needed — return nothing to pass through unchanged
return;
});
// ========================================================================
// COMPACTION DEFENSE: Validate cut-point doesn't orphan tool_results
// ========================================================================
pi.on("session_before_compact", async (event, ctx) => {
// We don't modify compaction behavior — we just log if the preparation
// would create orphans. The "context" handler above will fix them.
// This is informational/diagnostic only.
const { preparation } = event;
if (!preparation) return;
const { messagesToSummarize } = preparation;
// Check: does the last message being summarized contain tool_use calls?
// If so, are their tool_results being kept (not summarized)?
// If the compaction boundary splits tool_use from tool_result,
// the context handler will silently repair the orphans on next LLM call.
// Don't cancel or modify compaction — let it proceed
return;
});
// ========================================================================
// SESSION RESTORE DEFENSE: Validate history on session switch
// ========================================================================
pi.on("session_switch", async (event, ctx) => {
// The actual validation happens in the "context" handler on the next
// LLM call. We just reset our counters here.
totalRepairs = 0;
repairLog = [];
});
// ========================================================================
// AGENT END: Check for error patterns that indicate corruption we missed
// ========================================================================
pi.on("agent_end", async (event, ctx) => {
if (!event.messages) return;
// Look for the telltale 400 error in the last assistant message
for (let i = event.messages.length - 1; i >= 0; i--) {
const msg = event.messages[i];
if (msg.role !== "assistant") continue;
const assistantMsg = msg as AssistantMessage;
if (
assistantMsg.stopReason === "error" &&
assistantMsg.errorMessage &&
/unexpected.*tool_use_id|tool_result.*must have.*tool_use/i.test(
assistantMsg.errorMessage,
)
) {
// This should NEVER happen if our context handler is working.
// If it does, log it loudly so we can investigate.
console.error(
`[message-integrity-guard] CRITICAL: Tool use/result mismatch error ` +
`detected AFTER our validation! Error: ${assistantMsg.errorMessage}`,
);
ctx.ui.notify(
"⚠️ Tool history corruption detected! The context handler should " +
"have prevented this. Please report this as a bug. " +
"Try /compact or /new to recover.",
"error",
);
}
}
});
}