<!-- CURSOR_SUMMARY --> > [!NOTE] > Adds a context-limit banner with one-click “summarize into new chat,” refactors token counting with react-query, and persists per-message max token usage. > > - **Chat UX** > - **Context limit banner** (`ContextLimitBanner.tsx`, `MessagesList.tsx`): shows when within 40k tokens of `contextWindow`, with tooltip and action to summarize into a new chat. > - **Summarize flow**: extracted to `useSummarizeInNewChat` and used in chat input and banner; new summarize system prompt (`summarize_chat_system_prompt.ts`). > - **Token usage & counting** > - **Persist max tokens used per assistant message**: DB migration (`messages.max_tokens_used`), schema updates, and saving usage during streaming (`chat_stream_handlers.ts`). > - **Token counting refactor** (`useCountTokens.ts`): react-query with debounce; returns `estimatedTotalTokens` and `actualMaxTokens`; invalidated on model change and stream end; `TokenBar` updated. > - **Surfacing usage**: tooltip on latest assistant message shows total tokens (`ChatMessage.tsx`). > - **Model/config tweaks** > - Set `auto` model `contextWindow` to `200_000` (`language_model_constants.ts`). > - Improve chat auto-scroll dependency (`ChatPanel.tsx`). > - Fix app path validation regex (`app_handlers.ts`). > - **Testing & dev server** > - E2E tests for banner and summarize (`e2e-tests/context_limit_banner.spec.ts` + fixtures/snapshot). > - Fake LLM server streams usage to simulate high token scenarios (`testing/fake-llm-server/*`). > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 2ae16a14d50699cc772407426419192c2fdf2ec3. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY --> <!-- This is an auto-generated description by cubic. --> --- ## Summary by cubic Adds a “Summarize into new chat” trigger and a context limit banner to help keep conversations focused and avoid hitting model limits. Also tracks and surfaces actual token usage per assistant message, with a token counting refactor for reliability. - **New Features** - Summarize into new chat from the input or banner; improved system prompt with clear output format. - Context limit banner shows when within 40k tokens of the model’s context window and offers a one-click summarize action. - Tooltip on the latest assistant message shows total tokens used. - **Refactors** - Token counting now uses react-query and returns estimatedTotalTokens and actualMaxTokens; counts are invalidated on model change and when streaming settles. - Persist per-message max_tokens_used in the messages table; backend aggregates model usage during streaming and saves it. - Adjusted default “Auto” model contextWindow to 200k for more realistic limits. - Improved chat scrolling while streaming; fixed app path validation regex. <sup>Written for commit 2ae16a14d50699cc772407426419192c2fdf2ec3. Summary will update automatically on new commits.</sup> <!-- End of auto-generated description by cubic. -->
446 lines
13 KiB
TypeScript
446 lines
13 KiB
TypeScript
import { Request, Response } from "express";
|
|
import fs from "fs";
|
|
import path from "path";
|
|
import { CANNED_MESSAGE, createStreamChunk } from ".";
|
|
|
|
let globalCounter = 0;
|
|
|
|
export const createChatCompletionHandler =
|
|
(prefix: string) => async (req: Request, res: Response) => {
|
|
const { stream = false, messages = [] } = req.body;
|
|
console.log("* Received messages", messages);
|
|
|
|
// Check if the last message contains "[429]" to simulate rate limiting
|
|
const lastMessage = messages[messages.length - 1];
|
|
if (lastMessage && lastMessage.content === "[429]") {
|
|
return res.status(429).json({
|
|
error: {
|
|
message: "Too many requests. Please try again later.",
|
|
type: "rate_limit_error",
|
|
param: null,
|
|
code: "rate_limit_exceeded",
|
|
},
|
|
});
|
|
}
|
|
|
|
let messageContent = CANNED_MESSAGE;
|
|
|
|
if (
|
|
lastMessage &&
|
|
Array.isArray(lastMessage.content) &&
|
|
lastMessage.content.some(
|
|
(part: { type: string; text: string }) =>
|
|
part.type === "text" &&
|
|
part.text.includes("[[UPLOAD_IMAGE_TO_CODEBASE]]"),
|
|
)
|
|
) {
|
|
messageContent = `Uploading image to codebase
|
|
<dyad-write path="new/image/file.png" description="Uploaded image to codebase">
|
|
DYAD_ATTACHMENT_0
|
|
</dyad-write>
|
|
`;
|
|
messageContent += "\n\n" + generateDump(req);
|
|
}
|
|
|
|
if (
|
|
lastMessage &&
|
|
typeof lastMessage.content === "string" &&
|
|
lastMessage.content.includes("[sleep=medium]")
|
|
) {
|
|
await new Promise((resolve) => setTimeout(resolve, 10_000));
|
|
}
|
|
|
|
// TS auto-fix prefixes
|
|
if (
|
|
lastMessage &&
|
|
typeof lastMessage.content === "string" &&
|
|
lastMessage.content.startsWith(
|
|
"Fix these 2 TypeScript compile-time error",
|
|
)
|
|
) {
|
|
// Fix errors in create-ts-errors.md and introduce a new error
|
|
messageContent = `
|
|
<dyad-write path="src/bad-file.ts" description="Fix 2 errors and introduce a new error.">
|
|
// Import doesn't exist
|
|
// import NonExistentClass from 'non-existent-class';
|
|
|
|
|
|
const x = new Object();
|
|
x.nonExistentMethod2();
|
|
</dyad-write>
|
|
|
|
`;
|
|
}
|
|
if (
|
|
lastMessage &&
|
|
typeof lastMessage.content === "string" &&
|
|
lastMessage.content.startsWith(
|
|
"Fix these 1 TypeScript compile-time error",
|
|
)
|
|
) {
|
|
// Fix errors in create-ts-errors.md and introduce a new error
|
|
messageContent = `
|
|
<dyad-write path="src/bad-file.ts" description="Fix remaining error.">
|
|
// Import doesn't exist
|
|
// import NonExistentClass from 'non-existent-class';
|
|
|
|
|
|
const x = new Object();
|
|
x.toString(); // replaced with existing method
|
|
</dyad-write>
|
|
|
|
`;
|
|
}
|
|
|
|
if (
|
|
lastMessage &&
|
|
typeof lastMessage.content === "string" &&
|
|
lastMessage.content.includes("TypeScript compile-time error")
|
|
) {
|
|
messageContent += "\n\n" + generateDump(req);
|
|
}
|
|
if (
|
|
lastMessage &&
|
|
typeof lastMessage.content === "string" &&
|
|
lastMessage.content.startsWith("Fix error: Error Line 6 error")
|
|
) {
|
|
messageContent = `
|
|
Fixing the error...
|
|
<dyad-write path="src/pages/Index.tsx">
|
|
|
|
|
|
import { MadeWithDyad } from "@/components/made-with-dyad";
|
|
|
|
const Index = () => {
|
|
return (
|
|
<div className="min-h-screen flex items-center justify-center bg-gray-100">
|
|
<div className="text-center">
|
|
<h1 className="text-4xl font-bold mb-4">No more errors!</h1>
|
|
</div>
|
|
<MadeWithDyad />
|
|
</div>
|
|
);
|
|
};
|
|
|
|
export default Index;
|
|
|
|
</dyad-write>
|
|
`;
|
|
}
|
|
if (
|
|
lastMessage &&
|
|
typeof lastMessage.content === "string" &&
|
|
lastMessage.content.startsWith(
|
|
"There was an issue with the following `dyad-search-replace` tags.",
|
|
)
|
|
) {
|
|
if (lastMessage.content.includes("Make sure you use `dyad-read`")) {
|
|
// Fix errors in create-ts-errors.md and introduce a new error
|
|
messageContent =
|
|
`
|
|
<dyad-read path="src/pages/Index.tsx"></dyad-read>
|
|
|
|
<dyad-search-replace path="src/pages/Index.tsx">
|
|
<<<<<<< SEARCH
|
|
// STILL Intentionally DO NOT MATCH ANYTHING TO TRIGGER FALLBACK
|
|
<h1 className="text-4xl font-bold mb-4">Welcome to Your Blank App</h1>
|
|
=======
|
|
<h1 className="text-4xl font-bold mb-4">Welcome to the UPDATED App</h1>
|
|
>>>>>>> REPLACE
|
|
</dyad-search-replace>
|
|
` +
|
|
"\n\n" +
|
|
generateDump(req);
|
|
} else {
|
|
// Fix errors in create-ts-errors.md and introduce a new error
|
|
messageContent =
|
|
`
|
|
<dyad-write path="src/pages/Index.tsx" description="Rewrite file.">
|
|
// FILE IS REPLACED WITH FALLBACK WRITE.
|
|
</dyad-write>` +
|
|
"\n\n" +
|
|
generateDump(req);
|
|
}
|
|
}
|
|
|
|
console.error("LASTMESSAGE", lastMessage);
|
|
// Check if the last message is "[dump]" to write messages to file and return path
|
|
if (
|
|
lastMessage &&
|
|
(Array.isArray(lastMessage.content)
|
|
? lastMessage.content.some(
|
|
(part: { type: string; text: string }) =>
|
|
part.type === "text" && part.text.includes("[dump]"),
|
|
)
|
|
: lastMessage.content.includes("[dump]"))
|
|
) {
|
|
messageContent = generateDump(req);
|
|
}
|
|
|
|
if (
|
|
lastMessage &&
|
|
typeof lastMessage.content === "string" &&
|
|
lastMessage.content.startsWith("/security-review")
|
|
) {
|
|
messageContent = fs.readFileSync(
|
|
path.join(
|
|
__dirname,
|
|
"..",
|
|
"..",
|
|
"..",
|
|
"e2e-tests",
|
|
"fixtures",
|
|
"security-review",
|
|
"findings.md",
|
|
),
|
|
"utf-8",
|
|
);
|
|
messageContent += "\n\n" + generateDump(req);
|
|
}
|
|
|
|
if (lastMessage && lastMessage.content === "[increment]") {
|
|
globalCounter++;
|
|
messageContent = `counter=${globalCounter}`;
|
|
}
|
|
|
|
// Check if the last message starts with "tc=" to load test case file
|
|
if (
|
|
lastMessage &&
|
|
lastMessage.content &&
|
|
typeof lastMessage.content === "string" &&
|
|
lastMessage.content.startsWith("tc=")
|
|
) {
|
|
const testCaseName = lastMessage.content.slice(3).split("[")[0].trim(); // Remove "tc=" prefix
|
|
console.error(`* Loading test case: ${testCaseName}`);
|
|
const testFilePath = path.join(
|
|
__dirname,
|
|
"..",
|
|
"..",
|
|
"..",
|
|
"e2e-tests",
|
|
"fixtures",
|
|
prefix,
|
|
`${testCaseName}.md`,
|
|
);
|
|
|
|
try {
|
|
if (fs.existsSync(testFilePath)) {
|
|
messageContent = fs.readFileSync(testFilePath, "utf-8");
|
|
console.log(`* Loaded test case: ${testCaseName}`);
|
|
} else {
|
|
console.error(`* Test case file not found: ${testFilePath}`);
|
|
messageContent = `Error: Test case file not found: ${testCaseName}.md`;
|
|
}
|
|
} catch (error) {
|
|
console.error(`* Error reading test case file: ${error}`);
|
|
messageContent = `Error: Could not read test case file: ${testCaseName}.md`;
|
|
}
|
|
}
|
|
|
|
if (
|
|
lastMessage &&
|
|
lastMessage.content &&
|
|
typeof lastMessage.content === "string" &&
|
|
lastMessage.content.trim().endsWith("[[STRING_TO_BE_FINISHED]]")
|
|
) {
|
|
messageContent = `[[STRING_IS_FINISHED]]";</dyad-write>\nFinished writing file.`;
|
|
messageContent += "\n\n" + generateDump(req);
|
|
}
|
|
const isToolCall = !!(
|
|
lastMessage &&
|
|
lastMessage.content &&
|
|
lastMessage.content.includes("[call_tool=calculator_add]")
|
|
);
|
|
let message = {
|
|
role: "assistant",
|
|
content: messageContent,
|
|
} as any;
|
|
|
|
// Non-streaming response
|
|
if (!stream) {
|
|
if (isToolCall) {
|
|
const toolCallId = `call_${Date.now()}`;
|
|
return res.json({
|
|
id: `chatcmpl-${Date.now()}`,
|
|
object: "chat.completion",
|
|
created: Math.floor(Date.now() / 1000),
|
|
model: "fake-model",
|
|
choices: [
|
|
{
|
|
index: 0,
|
|
message: {
|
|
role: "assistant",
|
|
tool_calls: [
|
|
{
|
|
id: toolCallId,
|
|
type: "function",
|
|
function: {
|
|
name: "calculator_add",
|
|
arguments: JSON.stringify({ a: 1, b: 2 }),
|
|
},
|
|
},
|
|
],
|
|
},
|
|
finish_reason: "tool_calls",
|
|
},
|
|
],
|
|
});
|
|
}
|
|
return res.json({
|
|
id: `chatcmpl-${Date.now()}`,
|
|
object: "chat.completion",
|
|
created: Math.floor(Date.now() / 1000),
|
|
model: "fake-model",
|
|
choices: [
|
|
{
|
|
index: 0,
|
|
message,
|
|
finish_reason: "stop",
|
|
},
|
|
],
|
|
});
|
|
}
|
|
|
|
// Streaming response
|
|
res.setHeader("Content-Type", "text/event-stream");
|
|
res.setHeader("Cache-Control", "no-cache");
|
|
res.setHeader("Connection", "keep-alive");
|
|
|
|
// Tool call streaming (OpenAI-style)
|
|
if (isToolCall) {
|
|
const now = Date.now();
|
|
const mkChunk = (delta: any, finish: null | string = null) => {
|
|
const chunk = {
|
|
id: `chatcmpl-${now}`,
|
|
object: "chat.completion.chunk",
|
|
created: Math.floor(now / 1000),
|
|
model: "fake-model",
|
|
choices: [
|
|
{
|
|
index: 0,
|
|
delta,
|
|
finish_reason: finish,
|
|
},
|
|
],
|
|
};
|
|
return `data: ${JSON.stringify(chunk)}\n\n`;
|
|
};
|
|
|
|
// 1) Send role
|
|
res.write(mkChunk({ role: "assistant" }));
|
|
|
|
// 2) Send tool_calls init with id + name + empty args
|
|
const toolCallId = `call_${now}`;
|
|
res.write(
|
|
mkChunk({
|
|
tool_calls: [
|
|
{
|
|
index: 0,
|
|
id: toolCallId,
|
|
type: "function",
|
|
function: {
|
|
name: "testing-mcp-server__calculator_add",
|
|
arguments: "",
|
|
},
|
|
},
|
|
],
|
|
}),
|
|
);
|
|
|
|
// 3) Stream arguments gradually
|
|
const args = JSON.stringify({ a: 1, b: 2 });
|
|
let i = 0;
|
|
const argBatchSize = 6;
|
|
const argInterval = setInterval(() => {
|
|
if (i < args.length) {
|
|
const part = args.slice(i, i + argBatchSize);
|
|
i += argBatchSize;
|
|
res.write(
|
|
mkChunk({
|
|
tool_calls: [{ index: 0, function: { arguments: part } }],
|
|
}),
|
|
);
|
|
} else {
|
|
// 4) Finalize with finish_reason tool_calls and [DONE]
|
|
res.write(mkChunk({}, "tool_calls"));
|
|
res.write("data: [DONE]\n\n");
|
|
clearInterval(argInterval);
|
|
res.end();
|
|
}
|
|
}, 10);
|
|
return;
|
|
}
|
|
|
|
// Check for high token usage marker to simulate near context limit
|
|
const highTokensMatch =
|
|
typeof lastMessage?.content === "string" &&
|
|
!lastMessage?.content.startsWith("Summarize the following chat:") &&
|
|
lastMessage?.content?.match?.(/\[high-tokens=(\d+)\]/);
|
|
const highTokensValue = highTokensMatch
|
|
? parseInt(highTokensMatch[1], 10)
|
|
: null;
|
|
|
|
// Split the message into characters to simulate streaming
|
|
const messageChars = messageContent.split("");
|
|
|
|
// Stream each character with a delay
|
|
let index = 0;
|
|
const batchSize = 32;
|
|
|
|
// Send role first
|
|
res.write(createStreamChunk("", "assistant"));
|
|
|
|
const interval = setInterval(() => {
|
|
if (index < messageChars.length) {
|
|
// Get the next batch of characters (up to batchSize)
|
|
const batch = messageChars.slice(index, index + batchSize).join("");
|
|
res.write(createStreamChunk(batch));
|
|
index += batchSize;
|
|
} else {
|
|
// Send the final chunk with optional usage info for high token simulation
|
|
const usage = highTokensValue
|
|
? {
|
|
prompt_tokens: highTokensValue - 100,
|
|
completion_tokens: 100,
|
|
total_tokens: highTokensValue,
|
|
}
|
|
: undefined;
|
|
res.write(createStreamChunk("", "assistant", true, usage));
|
|
clearInterval(interval);
|
|
res.end();
|
|
}
|
|
}, 10);
|
|
};
|
|
|
|
function generateDump(req: Request) {
|
|
const timestamp = Date.now();
|
|
const generatedDir = path.join(__dirname, "generated");
|
|
|
|
// Create generated directory if it doesn't exist
|
|
if (!fs.existsSync(generatedDir)) {
|
|
fs.mkdirSync(generatedDir, { recursive: true });
|
|
}
|
|
|
|
const dumpFilePath = path.join(generatedDir, `${timestamp}.json`);
|
|
|
|
try {
|
|
fs.writeFileSync(
|
|
dumpFilePath,
|
|
JSON.stringify(
|
|
{
|
|
body: req.body,
|
|
headers: { authorization: req.headers["authorization"] },
|
|
},
|
|
null,
|
|
2,
|
|
).replace(/\r\n/g, "\n"),
|
|
"utf-8",
|
|
);
|
|
console.log(`* Dumped messages to: ${dumpFilePath}`);
|
|
return `[[dyad-dump-path=${dumpFilePath}]]`;
|
|
} catch (error) {
|
|
console.error(`* Error writing dump file: ${error}`);
|
|
return `Error: Could not write dump file: ${error}`;
|
|
}
|
|
}
|