Summarize chat trigger (#1890)

<!-- CURSOR_SUMMARY -->
> [!NOTE]
> Adds a context-limit banner with one-click “summarize into new chat,”
refactors token counting with react-query, and persists per-message max
token usage.
> 
> - **Chat UX**
> - **Context limit banner** (`ContextLimitBanner.tsx`,
`MessagesList.tsx`): shows when within 40k tokens of `contextWindow`,
with tooltip and action to summarize into a new chat.
> - **Summarize flow**: extracted to `useSummarizeInNewChat` and used in
chat input and banner; new summarize system prompt
(`summarize_chat_system_prompt.ts`).
> - **Token usage & counting**
> - **Persist max tokens used per assistant message**: DB migration
(`messages.max_tokens_used`), schema updates, and saving usage during
streaming (`chat_stream_handlers.ts`).
> - **Token counting refactor** (`useCountTokens.ts`): react-query with
debounce; returns `estimatedTotalTokens` and `actualMaxTokens`;
invalidated on model change and stream end; `TokenBar` updated.
> - **Surfacing usage**: tooltip on latest assistant message shows total
tokens (`ChatMessage.tsx`).
> - **Model/config tweaks**
> - Set `auto` model `contextWindow` to `200_000`
(`language_model_constants.ts`).
>   - Improve chat auto-scroll dependency (`ChatPanel.tsx`).
>   - Fix app path validation regex (`app_handlers.ts`).
> - **Testing & dev server**
> - E2E tests for banner and summarize
(`e2e-tests/context_limit_banner.spec.ts` + fixtures/snapshot).
> - Fake LLM server streams usage to simulate high token scenarios
(`testing/fake-llm-server/*`).
> 
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
2ae16a14d50699cc772407426419192c2fdf2ec3. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->













<!-- This is an auto-generated description by cubic. -->
---
## Summary by cubic
Adds a “Summarize into new chat” trigger and a context limit banner to
help keep conversations focused and avoid hitting model limits. Also
tracks and surfaces actual token usage per assistant message, with a
token counting refactor for reliability.

- **New Features**
- Summarize into new chat from the input or banner; improved system
prompt with clear output format.
- Context limit banner shows when within 40k tokens of the model’s
context window and offers a one-click summarize action.
  - Tooltip on the latest assistant message shows total tokens used.

- **Refactors**
- Token counting now uses react-query and returns estimatedTotalTokens
and actualMaxTokens; counts are invalidated on model change and when
streaming settles.
- Persist per-message max_tokens_used in the messages table; backend
aggregates model usage during streaming and saves it.
- Adjusted default “Auto” model contextWindow to 200k for more realistic
limits.
- Improved chat scrolling while streaming; fixed app path validation
regex.

<sup>Written for commit 2ae16a14d50699cc772407426419192c2fdf2ec3.
Summary will update automatically on new commits.</sup>

<!-- End of auto-generated description by cubic. -->
This commit is contained in:
Will Chen
2025-12-04 23:00:28 -08:00
committed by GitHub
parent 90c5805b57
commit 6235f7bb9d
24 changed files with 1185 additions and 91 deletions

View File

@@ -0,0 +1 @@
ALTER TABLE `messages` ADD `max_tokens_used` integer;

View File

@@ -0,0 +1,767 @@
{
"version": "6",
"dialect": "sqlite",
"id": "071199d7-dfb5-4681-85b7-228f1de3123a",
"prevId": "c0a49147-ac92-4046-afe8-42f20df9314b",
"tables": {
"apps": {
"name": "apps",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"name": {
"name": "name",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"path": {
"name": "path",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
},
"github_org": {
"name": "github_org",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"github_repo": {
"name": "github_repo",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"github_branch": {
"name": "github_branch",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"supabase_project_id": {
"name": "supabase_project_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"supabase_parent_project_id": {
"name": "supabase_parent_project_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"neon_project_id": {
"name": "neon_project_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"neon_development_branch_id": {
"name": "neon_development_branch_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"neon_preview_branch_id": {
"name": "neon_preview_branch_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"vercel_project_id": {
"name": "vercel_project_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"vercel_project_name": {
"name": "vercel_project_name",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"vercel_team_id": {
"name": "vercel_team_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"vercel_deployment_url": {
"name": "vercel_deployment_url",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"install_command": {
"name": "install_command",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"start_command": {
"name": "start_command",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"chat_context": {
"name": "chat_context",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"is_favorite": {
"name": "is_favorite",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "0"
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"chats": {
"name": "chats",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"app_id": {
"name": "app_id",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"initial_commit_hash": {
"name": "initial_commit_hash",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
}
},
"indexes": {},
"foreignKeys": {
"chats_app_id_apps_id_fk": {
"name": "chats_app_id_apps_id_fk",
"tableFrom": "chats",
"tableTo": "apps",
"columnsFrom": [
"app_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"language_model_providers": {
"name": "language_model_providers",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"name": {
"name": "name",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"api_base_url": {
"name": "api_base_url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"env_var_name": {
"name": "env_var_name",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"language_models": {
"name": "language_models",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"display_name": {
"name": "display_name",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"api_name": {
"name": "api_name",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"builtin_provider_id": {
"name": "builtin_provider_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"custom_provider_id": {
"name": "custom_provider_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"max_output_tokens": {
"name": "max_output_tokens",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"context_window": {
"name": "context_window",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
}
},
"indexes": {},
"foreignKeys": {
"language_models_custom_provider_id_language_model_providers_id_fk": {
"name": "language_models_custom_provider_id_language_model_providers_id_fk",
"tableFrom": "language_models",
"tableTo": "language_model_providers",
"columnsFrom": [
"custom_provider_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"mcp_servers": {
"name": "mcp_servers",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"name": {
"name": "name",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"transport": {
"name": "transport",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"command": {
"name": "command",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"args": {
"name": "args",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"env_json": {
"name": "env_json",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"enabled": {
"name": "enabled",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "0"
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"mcp_tool_consents": {
"name": "mcp_tool_consents",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"server_id": {
"name": "server_id",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"tool_name": {
"name": "tool_name",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"consent": {
"name": "consent",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'ask'"
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
}
},
"indexes": {
"uniq_mcp_consent": {
"name": "uniq_mcp_consent",
"columns": [
"server_id",
"tool_name"
],
"isUnique": true
}
},
"foreignKeys": {
"mcp_tool_consents_server_id_mcp_servers_id_fk": {
"name": "mcp_tool_consents_server_id_mcp_servers_id_fk",
"tableFrom": "mcp_tool_consents",
"tableTo": "mcp_servers",
"columnsFrom": [
"server_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"messages": {
"name": "messages",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"chat_id": {
"name": "chat_id",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"role": {
"name": "role",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"content": {
"name": "content",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"approval_state": {
"name": "approval_state",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"source_commit_hash": {
"name": "source_commit_hash",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"commit_hash": {
"name": "commit_hash",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"request_id": {
"name": "request_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"max_tokens_used": {
"name": "max_tokens_used",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
}
},
"indexes": {},
"foreignKeys": {
"messages_chat_id_chats_id_fk": {
"name": "messages_chat_id_chats_id_fk",
"tableFrom": "messages",
"tableTo": "chats",
"columnsFrom": [
"chat_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"prompts": {
"name": "prompts",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"content": {
"name": "content",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"versions": {
"name": "versions",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"app_id": {
"name": "app_id",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"commit_hash": {
"name": "commit_hash",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"neon_db_timestamp": {
"name": "neon_db_timestamp",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "(unixepoch())"
}
},
"indexes": {
"versions_app_commit_unique": {
"name": "versions_app_commit_unique",
"columns": [
"app_id",
"commit_hash"
],
"isUnique": true
}
},
"foreignKeys": {
"versions_app_id_apps_id_fk": {
"name": "versions_app_id_apps_id_fk",
"tableFrom": "versions",
"tableTo": "apps",
"columnsFrom": [
"app_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
}
},
"views": {},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
},
"internal": {
"indexes": {}
}
}

View File

@@ -120,6 +120,13 @@
"when": 1762297039106, "when": 1762297039106,
"tag": "0016_petite_thanos", "tag": "0016_petite_thanos",
"breakpoints": true "breakpoints": true
},
{
"idx": 17,
"version": "6",
"when": 1764804624402,
"tag": "0017_sharp_corsair",
"breakpoints": true
} }
] ]
} }

View File

@@ -0,0 +1,46 @@
import { test, Timeout } from "./helpers/test_helper";
import { expect } from "@playwright/test";
test("context limit banner appears and summarize works", async ({ po }) => {
await po.setUp();
// Send a message that triggers high token usage (110k tokens)
// With a default context window of 128k, this leaves only 18k tokens remaining
// which is below the 40k threshold to show the banner
await po.sendPrompt("tc=context-limit-response [high-tokens=110000]");
// Verify the context limit banner appears
const contextLimitBanner = po.page.getByTestId("context-limit-banner");
await expect(contextLimitBanner).toBeVisible({ timeout: Timeout.MEDIUM });
// Verify banner text
await expect(contextLimitBanner).toContainText(
"You're close to the context limit for this chat.",
);
// Click the summarize button
await contextLimitBanner
.getByRole("button", { name: "Summarize into new chat" })
.click();
// Wait for the new chat to load and message to complete
await po.waitForChatCompletion();
// Snapshot the messages in the new chat
await po.snapshotMessages();
});
test("context limit banner does not appear when within limit", async ({
po,
}) => {
await po.setUp();
// Send a message with low token usage (50k tokens)
// With a 128k context window, this leaves 78k tokens remaining
// which is above the 40k threshold - banner should NOT appear
await po.sendPrompt("tc=context-limit-response [high-tokens=50000]");
// Verify the context limit banner does NOT appear
const contextLimitBanner = po.page.getByTestId("context-limit-banner");
await expect(contextLimitBanner).not.toBeVisible();
});

View File

@@ -0,0 +1,4 @@
Here is a simple response to test the context limit banner functionality.
This message simulates being close to the model's context window limit.

View File

@@ -0,0 +1,14 @@
- paragraph: Summarize from chat-id=1
- img
- text: file1.txt
- button "Edit":
- img
- img
- text: file1.txt
- paragraph: More EOM
- button:
- img
- img
- text: less than a minute ago
- button "Retry":
- img

View File

@@ -97,7 +97,11 @@ export function ChatPanel({
const streamCount = chatId ? (streamCountById.get(chatId) ?? 0) : 0; const streamCount = chatId ? (streamCountById.get(chatId) ?? 0) : 0;
console.log("streamCount - scrolling to bottom", streamCount); console.log("streamCount - scrolling to bottom", streamCount);
scrollToBottom(); scrollToBottom();
}, [chatId, chatId ? (streamCountById.get(chatId) ?? 0) : 0]); }, [
chatId,
chatId ? (streamCountById.get(chatId) ?? 0) : 0,
chatId ? (isStreamingById.get(chatId) ?? false) : false,
]);
useEffect(() => { useEffect(() => {
const container = messagesContainerRef.current; const container = messagesContainerRef.current;

View File

@@ -27,11 +27,17 @@ import { useSettings } from "@/hooks/useSettings";
import { PriceBadge } from "@/components/PriceBadge"; import { PriceBadge } from "@/components/PriceBadge";
import { TURBO_MODELS } from "@/ipc/shared/language_model_constants"; import { TURBO_MODELS } from "@/ipc/shared/language_model_constants";
import { cn } from "@/lib/utils"; import { cn } from "@/lib/utils";
import { useQueryClient } from "@tanstack/react-query";
import { TOKEN_COUNT_QUERY_KEY } from "@/hooks/useCountTokens";
export function ModelPicker() { export function ModelPicker() {
const { settings, updateSettings } = useSettings(); const { settings, updateSettings } = useSettings();
const queryClient = useQueryClient();
const onModelSelect = (model: LargeLanguageModel) => { const onModelSelect = (model: LargeLanguageModel) => {
updateSettings({ selectedModel: model }); updateSettings({ selectedModel: model });
// Invalidate token count when model changes since different models have different context windows
// (technically they have different tokenizers, but we don't keep track of that).
queryClient.invalidateQueries({ queryKey: TOKEN_COUNT_QUERY_KEY });
}; };
const [open, setOpen] = useState(false); const [open, setOpen] = useState(false);

View File

@@ -52,13 +52,14 @@ import {
TooltipProvider, TooltipProvider,
TooltipTrigger, TooltipTrigger,
} from "../ui/tooltip"; } from "../ui/tooltip";
import { useNavigate } from "@tanstack/react-router";
import { useVersions } from "@/hooks/useVersions"; import { useVersions } from "@/hooks/useVersions";
import { useAttachments } from "@/hooks/useAttachments"; import { useAttachments } from "@/hooks/useAttachments";
import { AttachmentsList } from "./AttachmentsList"; import { AttachmentsList } from "./AttachmentsList";
import { DragDropOverlay } from "./DragDropOverlay"; import { DragDropOverlay } from "./DragDropOverlay";
import { FileAttachmentDropdown } from "./FileAttachmentDropdown"; import { FileAttachmentDropdown } from "./FileAttachmentDropdown";
import { showError, showExtraFilesToast } from "@/lib/toast"; import { showExtraFilesToast } from "@/lib/toast";
import { useSummarizeInNewChat } from "./SummarizeInNewChatButton";
import { ChatInputControls } from "../ChatInputControls"; import { ChatInputControls } from "../ChatInputControls";
import { ChatErrorBox } from "./ChatErrorBox"; import { ChatErrorBox } from "./ChatErrorBox";
import { import {
@@ -419,30 +420,10 @@ function SuggestionButton({
} }
function SummarizeInNewChatButton() { function SummarizeInNewChatButton() {
const chatId = useAtomValue(selectedChatIdAtom); const { handleSummarize } = useSummarizeInNewChat();
const appId = useAtomValue(selectedAppIdAtom);
const { streamMessage } = useStreamChat();
const navigate = useNavigate();
const onClick = async () => {
if (!appId) {
console.error("No app id found");
return;
}
try {
const newChatId = await IpcClient.getInstance().createChat(appId);
// navigate to new chat
await navigate({ to: "/chat", search: { id: newChatId } });
await streamMessage({
prompt: "Summarize from chat-id=" + chatId,
chatId: newChatId,
});
} catch (err) {
showError(err);
}
};
return ( return (
<SuggestionButton <SuggestionButton
onClick={onClick} onClick={handleSummarize}
tooltipText="Creating a new chat makes the AI more focused and efficient" tooltipText="Creating a new chat makes the AI more focused and efficient"
> >
Summarize to new chat Summarize to new chat

View File

@@ -12,6 +12,7 @@ import {
GitCommit, GitCommit,
Copy, Copy,
Check, Check,
Info,
} from "lucide-react"; } from "lucide-react";
import { formatDistanceToNow, format } from "date-fns"; import { formatDistanceToNow, format } from "date-fns";
import { useVersions } from "@/hooks/useVersions"; import { useVersions } from "@/hooks/useVersions";
@@ -277,6 +278,20 @@ const ChatMessage = ({ message, isLastMessage }: ChatMessageProps) => {
</Tooltip> </Tooltip>
</TooltipProvider> </TooltipProvider>
)} )}
{isLastMessage && message.totalTokens && (
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<div className="flex items-center space-x-1 px-1 py-0.5">
<Info className="h-3 w-3" />
</div>
</TooltipTrigger>
<TooltipContent>
Max tokens used: {message.totalTokens.toLocaleString()}
</TooltipContent>
</Tooltip>
</TooltipProvider>
)}
</div> </div>
)} )}
</div> </div>

View File

@@ -0,0 +1,89 @@
import { AlertTriangle, ArrowRight } from "lucide-react";
import { Button } from "@/components/ui/button";
import {
Tooltip,
TooltipContent,
TooltipTrigger,
} from "@/components/ui/tooltip";
import { useSummarizeInNewChat } from "./SummarizeInNewChatButton";
const CONTEXT_LIMIT_THRESHOLD = 40_000;
interface ContextLimitBannerProps {
totalTokens?: number | null;
contextWindow?: number;
}
function formatTokenCount(count: number): string {
if (count >= 1000) {
return `${(count / 1000).toFixed(1)}k`.replace(".0k", "k");
}
return count.toString();
}
export function ContextLimitBanner({
totalTokens,
contextWindow,
}: ContextLimitBannerProps) {
const { handleSummarize } = useSummarizeInNewChat();
// Don't show banner if we don't have the necessary data
if (!totalTokens || !contextWindow) {
return null;
}
// Check if we're within 40k tokens of the context limit
const tokensRemaining = contextWindow - totalTokens;
if (tokensRemaining > CONTEXT_LIMIT_THRESHOLD) {
return null;
}
return (
<div
className="mx-auto max-w-3xl my-3 p-2 rounded-lg border border-amber-500/30 bg-amber-500/10 flex flex-col gap-2"
data-testid="context-limit-banner"
>
<div className="flex items-center gap-2 text-amber-600 dark:text-amber-400">
<Tooltip>
<TooltipTrigger asChild>
<Button
variant="ghost"
size="icon"
className="h-5 w-5 p-0 hover:bg-transparent text-amber-600 dark:text-amber-400 cursor-help"
>
<AlertTriangle className="h-4 w-4 shrink-0" />
</Button>
</TooltipTrigger>
<TooltipContent className="w-auto p-2 text-xs" side="top">
<div className="grid gap-1">
<div className="flex justify-between gap-4">
<span>Used:</span>
<span className="font-medium">
{formatTokenCount(totalTokens)}
</span>
</div>
<div className="flex justify-between gap-4">
<span>Limit:</span>
<span className="font-medium">
{formatTokenCount(contextWindow)}
</span>
</div>
</div>
</TooltipContent>
</Tooltip>
<p className="text-sm font-medium">
You're close to the context limit for this chat.
</p>
</div>
<Button
onClick={handleSummarize}
variant="outline"
size="sm"
className="h-8 border-amber-500/50 hover:bg-amber-500/20 hover:border-amber-500 text-amber-600 dark:text-amber-400"
>
Summarize into new chat
<ArrowRight className="h-3 w-3 ml-2" />
</Button>
</div>
);
}

View File

@@ -18,6 +18,8 @@ import { useLanguageModelProviders } from "@/hooks/useLanguageModelProviders";
import { useSettings } from "@/hooks/useSettings"; import { useSettings } from "@/hooks/useSettings";
import { useUserBudgetInfo } from "@/hooks/useUserBudgetInfo"; import { useUserBudgetInfo } from "@/hooks/useUserBudgetInfo";
import { PromoMessage } from "./PromoMessage"; import { PromoMessage } from "./PromoMessage";
import { ContextLimitBanner } from "./ContextLimitBanner";
import { useCountTokens } from "@/hooks/useCountTokens";
interface MessagesListProps { interface MessagesListProps {
messages: Message[]; messages: Message[];
@@ -36,6 +38,11 @@ export const MessagesList = forwardRef<HTMLDivElement, MessagesListProps>(
const [isRetryLoading, setIsRetryLoading] = useState(false); const [isRetryLoading, setIsRetryLoading] = useState(false);
const selectedChatId = useAtomValue(selectedChatIdAtom); const selectedChatId = useAtomValue(selectedChatIdAtom);
const { userBudget } = useUserBudgetInfo(); const { userBudget } = useUserBudgetInfo();
// Only fetch token count when not streaming
const { result: tokenCountResult } = useCountTokens(
!isStreaming ? selectedChatId : null,
"",
);
const renderSetupBanner = () => { const renderSetupBanner = () => {
const selectedModel = settings?.selectedModel; const selectedModel = settings?.selectedModel;
@@ -73,6 +80,13 @@ export const MessagesList = forwardRef<HTMLDivElement, MessagesListProps>(
</div> </div>
</div> </div>
)} )}
{/* Show context limit banner when close to token limit */}
{!isStreaming && tokenCountResult && (
<ContextLimitBanner
totalTokens={tokenCountResult.actualMaxTokens}
contextWindow={tokenCountResult.contextWindow}
/>
)}
{!isStreaming && ( {!isStreaming && (
<div className="flex max-w-3xl mx-auto gap-2"> <div className="flex max-w-3xl mx-auto gap-2">
{!!messages.length && {!!messages.length &&

View File

@@ -0,0 +1,38 @@
import { useNavigate } from "@tanstack/react-router";
import { useAtomValue } from "jotai";
import { selectedChatIdAtom } from "@/atoms/chatAtoms";
import { selectedAppIdAtom } from "@/atoms/appAtoms";
import { useStreamChat } from "@/hooks/useStreamChat";
import { IpcClient } from "@/ipc/ipc_client";
import { showError } from "@/lib/toast";
export function useSummarizeInNewChat() {
const chatId = useAtomValue(selectedChatIdAtom);
const appId = useAtomValue(selectedAppIdAtom);
const { streamMessage } = useStreamChat();
const navigate = useNavigate();
const handleSummarize = async () => {
if (!appId) {
console.error("No app id found");
return;
}
if (!chatId) {
console.error("No chat id found");
return;
}
try {
const newChatId = await IpcClient.getInstance().createChat(appId);
// navigate to new chat
await navigate({ to: "/chat", search: { id: newChatId } });
await streamMessage({
prompt: "Summarize from chat-id=" + chatId,
chatId: newChatId,
});
} catch (err) {
showError(err);
}
};
return { handleSummarize };
}

View File

@@ -1,4 +1,4 @@
import React, { useEffect, useState } from "react"; import React from "react";
import { import {
Tooltip, Tooltip,
TooltipContent, TooltipContent,
@@ -24,31 +24,15 @@ interface TokenBarProps {
export function TokenBar({ chatId }: TokenBarProps) { export function TokenBar({ chatId }: TokenBarProps) {
const [inputValue] = useAtom(chatInputValueAtom); const [inputValue] = useAtom(chatInputValueAtom);
const { countTokens, result } = useCountTokens();
const [error, setError] = useState<string | null>(null);
const { settings } = useSettings(); const { settings } = useSettings();
useEffect(() => { const { result, error } = useCountTokens(chatId ?? null, inputValue);
if (!chatId) return;
// Mark this as used, we need to re-trigger token count
// when selected model changes.
void settings?.selectedModel;
const debounceTimer = setTimeout(() => {
countTokens(chatId, inputValue).catch((err) => {
setError("Failed to count tokens");
console.error("Token counting error:", err);
});
}, 500);
return () => clearTimeout(debounceTimer);
}, [chatId, inputValue, countTokens, settings?.selectedModel]);
if (!chatId || !result) { if (!chatId || !result) {
return null; return null;
} }
const { const {
totalTokens, estimatedTotalTokens: totalTokens,
messageHistoryTokens, messageHistoryTokens,
codebaseTokens, codebaseTokens,
mentionedAppsTokens, mentionedAppsTokens,
@@ -142,7 +126,9 @@ export function TokenBar({ chatId }: TokenBarProps) {
</TooltipContent> </TooltipContent>
</Tooltip> </Tooltip>
</TooltipProvider> </TooltipProvider>
{error && <div className="text-red-500 text-xs mt-1">{error}</div>} {error && (
<div className="text-red-500 text-xs mt-1">Failed to count tokens</div>
)}
{(!settings?.enableProSmartFilesContextMode || {(!settings?.enableProSmartFilesContextMode ||
!settings?.enableDyadPro) && ( !settings?.enableDyadPro) && (
<div className="text-xs text-center text-muted-foreground mt-2"> <div className="text-xs text-center text-muted-foreground mt-2">

View File

@@ -77,6 +77,8 @@ export const messages = sqliteTable("messages", {
// The commit hash of the codebase at the time the message was sent // The commit hash of the codebase at the time the message was sent
commitHash: text("commit_hash"), commitHash: text("commit_hash"),
requestId: text("request_id"), requestId: text("request_id"),
// Max tokens used for this message (only for assistant messages)
maxTokensUsed: integer("max_tokens_used"),
createdAt: integer("created_at", { mode: "timestamp" }) createdAt: integer("created_at", { mode: "timestamp" })
.notNull() .notNull()
.default(sql`(unixepoch())`), .default(sql`(unixepoch())`),

View File

@@ -1,43 +1,62 @@
import { useCallback } from "react"; import {
import { atom, useAtom } from "jotai"; keepPreviousData,
useQuery,
useQueryClient,
} from "@tanstack/react-query";
import { IpcClient } from "@/ipc/ipc_client"; import { IpcClient } from "@/ipc/ipc_client";
import type { TokenCountResult } from "@/ipc/ipc_types"; import type { TokenCountResult } from "@/ipc/ipc_types";
import { useCallback, useEffect, useState } from "react";
// Create atoms to store the token count state export const TOKEN_COUNT_QUERY_KEY = ["tokenCount"] as const;
export const tokenCountResultAtom = atom<TokenCountResult | null>(null);
export const tokenCountLoadingAtom = atom<boolean>(false);
export const tokenCountErrorAtom = atom<Error | null>(null);
export function useCountTokens() { export function useCountTokens(chatId: number | null, input: string = "") {
const [result, setResult] = useAtom(tokenCountResultAtom); const queryClient = useQueryClient();
const [loading, setLoading] = useAtom(tokenCountLoadingAtom);
const [error, setError] = useAtom(tokenCountErrorAtom);
const countTokens = useCallback( // Debounce input so we don't call the token counting IPC on every keystroke.
async (chatId: number, input: string) => { const [debouncedInput, setDebouncedInput] = useState(input);
setLoading(true);
setError(null);
try { useEffect(() => {
const ipcClient = IpcClient.getInstance(); // If there's no chat, don't bother debouncing
const tokenResult = await ipcClient.countTokens({ chatId, input }); if (chatId === null) {
setResult(tokenResult); setDebouncedInput(input);
return tokenResult; return;
} catch (error) { }
console.error("Error counting tokens:", error);
setError(error instanceof Error ? error : new Error(String(error))); const handle = setTimeout(() => {
throw error; setDebouncedInput(input);
} finally { }, 1_000);
setLoading(false);
} return () => clearTimeout(handle);
}, [chatId, input]);
const {
data: result = null,
isLoading: loading,
error,
refetch,
} = useQuery<TokenCountResult | null>({
queryKey: [...TOKEN_COUNT_QUERY_KEY, chatId, debouncedInput],
queryFn: async () => {
if (chatId === null) return null;
return IpcClient.getInstance().countTokens({
chatId,
input: debouncedInput,
});
}, },
[setLoading, setError, setResult], placeholderData: keepPreviousData,
); enabled: chatId !== null,
});
// For imperative invalidation (e.g., after streaming completes)
const invalidateTokenCount = useCallback(() => {
queryClient.invalidateQueries({ queryKey: TOKEN_COUNT_QUERY_KEY });
}, [queryClient]);
return { return {
countTokens,
result, result,
loading, loading,
error, error,
refetch,
invalidateTokenCount,
}; };
} }

View File

@@ -49,7 +49,6 @@ export function useStreamChat({
const setStreamCountById = useSetAtom(chatStreamCountByIdAtom); const setStreamCountById = useSetAtom(chatStreamCountByIdAtom);
const { refreshVersions } = useVersions(selectedAppId); const { refreshVersions } = useVersions(selectedAppId);
const { refreshAppIframe } = useRunApp(); const { refreshAppIframe } = useRunApp();
const { countTokens } = useCountTokens();
const { refetchUserBudget } = useUserBudgetInfo(); const { refetchUserBudget } = useUserBudgetInfo();
const { checkProblems } = useCheckProblems(selectedAppId); const { checkProblems } = useCheckProblems(selectedAppId);
const { settings } = useSettings(); const { settings } = useSettings();
@@ -62,6 +61,7 @@ export function useStreamChat({
chatId = id; chatId = id;
} }
let { refreshProposal } = hasChatId ? useProposal(chatId) : useProposal(); let { refreshProposal } = hasChatId ? useProposal(chatId) : useProposal();
const { invalidateTokenCount } = useCountTokens(chatId ?? null, "");
const streamMessage = useCallback( const streamMessage = useCallback(
async ({ async ({
@@ -154,7 +154,7 @@ export function useStreamChat({
refreshChats(); refreshChats();
refreshApp(); refreshApp();
refreshVersions(); refreshVersions();
countTokens(chatId, ""); invalidateTokenCount();
onSettled?.(); onSettled?.();
}, },
onError: (errorMessage: string) => { onError: (errorMessage: string) => {
@@ -174,7 +174,7 @@ export function useStreamChat({
refreshChats(); refreshChats();
refreshApp(); refreshApp();
refreshVersions(); refreshVersions();
countTokens(chatId, ""); invalidateTokenCount();
onSettled?.(); onSettled?.();
}, },
}); });

View File

@@ -448,6 +448,7 @@ ${componentSnippet}
}); });
let fullResponse = ""; let fullResponse = "";
let maxTokensUsed: number | undefined;
// Check if this is a test prompt // Check if this is a test prompt
const testResponse = getTestResponse(req.prompt); const testResponse = getTestResponse(req.prompt);
@@ -885,7 +886,7 @@ This conversation includes one or more image attachments. When the user uploads
} satisfies GoogleGenerativeAIProviderOptions; } satisfies GoogleGenerativeAIProviderOptions;
} }
return streamText({ const streamResult = streamText({
headers: isAnthropic headers: isAnthropic
? { ? {
"anthropic-beta": "context-1m-2025-08-07", "anthropic-beta": "context-1m-2025-08-07",
@@ -900,6 +901,33 @@ This conversation includes one or more image attachments. When the user uploads
system: systemPromptOverride, system: systemPromptOverride,
tools, tools,
messages: chatMessages.filter((m) => m.content), messages: chatMessages.filter((m) => m.content),
onFinish: (response) => {
const totalTokens = response.usage?.totalTokens;
if (typeof totalTokens === "number") {
// We use the highest total tokens used (we are *not* accumulating)
// since we're trying to figure it out if we're near the context limit.
maxTokensUsed = Math.max(maxTokensUsed ?? 0, totalTokens);
// Persist the aggregated token usage on the placeholder assistant message
void db
.update(messages)
.set({ maxTokensUsed: maxTokensUsed })
.where(eq(messages.id, placeholderAssistantMessage.id))
.catch((error) => {
logger.error(
"Failed to save total tokens for assistant message",
error,
);
});
logger.log(
`Total tokens used (aggregated for message ${placeholderAssistantMessage.id}): ${maxTokensUsed}`,
);
} else {
logger.log("Total tokens used: unknown");
}
},
onError: (error: any) => { onError: (error: any) => {
let errorMessage = (error as any)?.error?.message; let errorMessage = (error as any)?.error?.message;
const responseBody = error?.error?.responseBody; const responseBody = error?.error?.responseBody;
@@ -923,6 +951,10 @@ This conversation includes one or more image attachments. When the user uploads
}, },
abortSignal: abortController.signal, abortSignal: abortController.signal,
}); });
return {
fullStream: streamResult.fullStream,
usage: streamResult.usage,
};
}; };
let lastDbSaveAt = 0; let lastDbSaveAt = 0;

View File

@@ -141,8 +141,15 @@ export function registerTokenCountHandlers() {
codebaseTokens + codebaseTokens +
mentionedAppsTokens; mentionedAppsTokens;
// Find the last assistant message since totalTokens is only set on assistant messages
const lastAssistantMessage = [...chat.messages]
.reverse()
.find((m) => m.role === "assistant");
const actualMaxTokens = lastAssistantMessage?.maxTokensUsed ?? null;
return { return {
totalTokens, estimatedTotalTokens: totalTokens,
actualMaxTokens,
messageHistoryTokens, messageHistoryTokens,
codebaseTokens, codebaseTokens,
mentionedAppsTokens, mentionedAppsTokens,

View File

@@ -49,6 +49,8 @@ export interface ChatResponseEnd {
updatedFiles: boolean; updatedFiles: boolean;
extraFiles?: string[]; extraFiles?: string[];
extraFilesError?: string; extraFilesError?: string;
totalTokens?: number;
contextWindow?: number;
} }
export interface ChatProblemsEvent { export interface ChatProblemsEvent {
@@ -81,6 +83,7 @@ export interface Message {
dbTimestamp?: string | null; dbTimestamp?: string | null;
createdAt?: Date | string; createdAt?: Date | string;
requestId?: string | null; requestId?: string | null;
totalTokens?: number | null;
} }
export interface Chat { export interface Chat {
@@ -167,7 +170,8 @@ export interface TokenCountParams {
} }
export interface TokenCountResult { export interface TokenCountResult {
totalTokens: number; estimatedTotalTokens: number;
actualMaxTokens: number | null;
messageHistoryTokens: number; messageHistoryTokens: number;
codebaseTokens: number; codebaseTokens: number;
mentionedAppsTokens: number; mentionedAppsTokens: number;

View File

@@ -306,11 +306,9 @@ export const MODEL_OPTIONS: Record<string, ModelOption[]> = {
displayName: "Auto", displayName: "Auto",
description: "Automatically selects the best model", description: "Automatically selects the best model",
tag: "Default", tag: "Default",
// These are below Gemini 2.5 Pro & Flash limits // The following is reasonable defaults.
// which are the ones defaulted to for both regular auto
// and smart auto.
maxOutputTokens: 32_000, maxOutputTokens: 32_000,
contextWindow: 1_000_000, contextWindow: 200_000,
temperature: 0, temperature: 0,
}, },
{ {

View File

@@ -1,8 +1,42 @@
export const SUMMARIZE_CHAT_SYSTEM_PROMPT = ` export const SUMMARIZE_CHAT_SYSTEM_PROMPT = `
You are a helpful assistant that understands long conversations and can summarize them in a few bullet points. You are a helpful assistant that summarizes AI coding chat sessions with a focus on technical changes and file modifications.
I want you to write down the gist of the conversation in a few bullet points, focusing on the major changes, particularly Your task is to analyze the conversation and provide:
at the end of the conversation.
Use <dyad-chat-summary> for setting the chat summary (put this at the end). The chat summary should be less than a sentence, but more than a few words. YOU SHOULD ALWAYS INCLUDE EXACTLY ONE CHAT TITLE 1. **Chat Summary**: A concise summary (less than a sentence, more than a few words) that captures the primary objective or outcome of the session.
2. **Major Changes**: Identify and highlight:
- Major code modifications, refactors, or new features implemented
- Critical bug fixes or debugging sessions
- Architecture or design pattern changes
- Important decisions made during the conversation
3. **Relevant Files**: List the most important files discussed or modified, with brief context:
- Files that received significant changes
- New files created
- Files central to the discussion or problem-solving
- Format: \`path/to/file.ext - brief description of changes\`
4. **Focus on Recency**: Prioritize changes and discussions from the latter part of the conversation, as these typically represent the final state or most recent decisions.
**Output Format:**
## Major Changes
- Bullet point of significant change 1
- Bullet point of significant change 2
## Important Context
- Any critical decisions, trade-offs, or next steps discussed
## Relevant Files
- \`file1.ts\` - Description of changes
- \`file2.py\` - Description of changes
<dyad-chat-summary>
[Your concise summary here - less than a sentence, more than a few words]
</dyad-chat-summary>
**Reminder:**
YOU MUST ALWAYS INCLUDE EXACTLY ONE <dyad-chat-summary> TAG AT THE END.
`; `;

View File

@@ -371,6 +371,15 @@ export default Index;
return; return;
} }
// Check for high token usage marker to simulate near context limit
const highTokensMatch =
typeof lastMessage?.content === "string" &&
!lastMessage?.content.startsWith("Summarize the following chat:") &&
lastMessage?.content?.match?.(/\[high-tokens=(\d+)\]/);
const highTokensValue = highTokensMatch
? parseInt(highTokensMatch[1], 10)
: null;
// Split the message into characters to simulate streaming // Split the message into characters to simulate streaming
const messageChars = messageContent.split(""); const messageChars = messageContent.split("");
@@ -388,8 +397,15 @@ export default Index;
res.write(createStreamChunk(batch)); res.write(createStreamChunk(batch));
index += batchSize; index += batchSize;
} else { } else {
// Send the final chunk // Send the final chunk with optional usage info for high token simulation
res.write(createStreamChunk("", "assistant", true)); const usage = highTokensValue
? {
prompt_tokens: highTokensValue - 100,
completion_tokens: 100,
total_tokens: highTokensValue,
}
: undefined;
res.write(createStreamChunk("", "assistant", true, usage));
clearInterval(interval); clearInterval(interval);
res.end(); res.end();
} }

View File

@@ -29,8 +29,13 @@ export function createStreamChunk(
content: string, content: string,
role: string = "assistant", role: string = "assistant",
isLast: boolean = false, isLast: boolean = false,
usage?: {
prompt_tokens: number;
completion_tokens: number;
total_tokens: number;
},
) { ) {
const chunk = { const chunk: any = {
id: `chatcmpl-${Date.now()}`, id: `chatcmpl-${Date.now()}`,
object: "chat.completion.chunk", object: "chat.completion.chunk",
created: Math.floor(Date.now() / 1000), created: Math.floor(Date.now() / 1000),
@@ -44,6 +49,11 @@ export function createStreamChunk(
], ],
}; };
// Add usage info to the final chunk if provided
if (isLast && usage) {
chunk.usage = usage;
}
return `data: ${JSON.stringify(chunk)}\n\n${isLast ? "data: [DONE]\n\n" : ""}`; return `data: ${JSON.stringify(chunk)}\n\n${isLast ? "data: [DONE]\n\n" : ""}`;
} }