Summarize chat trigger (#1890)

<!-- CURSOR_SUMMARY -->
> [!NOTE]
> Adds a context-limit banner with one-click “summarize into new chat,”
refactors token counting with react-query, and persists per-message max
token usage.
> 
> - **Chat UX**
> - **Context limit banner** (`ContextLimitBanner.tsx`,
`MessagesList.tsx`): shows when within 40k tokens of `contextWindow`,
with tooltip and action to summarize into a new chat.
> - **Summarize flow**: extracted to `useSummarizeInNewChat` and used in
chat input and banner; new summarize system prompt
(`summarize_chat_system_prompt.ts`).
> - **Token usage & counting**
> - **Persist max tokens used per assistant message**: DB migration
(`messages.max_tokens_used`), schema updates, and saving usage during
streaming (`chat_stream_handlers.ts`).
> - **Token counting refactor** (`useCountTokens.ts`): react-query with
debounce; returns `estimatedTotalTokens` and `actualMaxTokens`;
invalidated on model change and stream end; `TokenBar` updated.
> - **Surfacing usage**: tooltip on latest assistant message shows total
tokens (`ChatMessage.tsx`).
> - **Model/config tweaks**
> - Set `auto` model `contextWindow` to `200_000`
(`language_model_constants.ts`).
>   - Improve chat auto-scroll dependency (`ChatPanel.tsx`).
>   - Fix app path validation regex (`app_handlers.ts`).
> - **Testing & dev server**
> - E2E tests for banner and summarize
(`e2e-tests/context_limit_banner.spec.ts` + fixtures/snapshot).
> - Fake LLM server streams usage to simulate high token scenarios
(`testing/fake-llm-server/*`).
> 
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
2ae16a14d50699cc772407426419192c2fdf2ec3. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->













<!-- This is an auto-generated description by cubic. -->
---
## Summary by cubic
Adds a “Summarize into new chat” trigger and a context limit banner to
help keep conversations focused and avoid hitting model limits. Also
tracks and surfaces actual token usage per assistant message, with a
token counting refactor for reliability.

- **New Features**
- Summarize into new chat from the input or banner; improved system
prompt with clear output format.
- Context limit banner shows when within 40k tokens of the model’s
context window and offers a one-click summarize action.
  - Tooltip on the latest assistant message shows total tokens used.

- **Refactors**
- Token counting now uses react-query and returns estimatedTotalTokens
and actualMaxTokens; counts are invalidated on model change and when
streaming settles.
- Persist per-message max_tokens_used in the messages table; backend
aggregates model usage during streaming and saves it.
- Adjusted default “Auto” model contextWindow to 200k for more realistic
limits.
- Improved chat scrolling while streaming; fixed app path validation
regex.

<sup>Written for commit 2ae16a14d50699cc772407426419192c2fdf2ec3.
Summary will update automatically on new commits.</sup>

<!-- End of auto-generated description by cubic. -->
This commit is contained in:
Will Chen
2025-12-04 23:00:28 -08:00
committed by GitHub
parent 90c5805b57
commit 6235f7bb9d
24 changed files with 1185 additions and 91 deletions

View File

@@ -97,7 +97,11 @@ export function ChatPanel({
const streamCount = chatId ? (streamCountById.get(chatId) ?? 0) : 0;
console.log("streamCount - scrolling to bottom", streamCount);
scrollToBottom();
}, [chatId, chatId ? (streamCountById.get(chatId) ?? 0) : 0]);
}, [
chatId,
chatId ? (streamCountById.get(chatId) ?? 0) : 0,
chatId ? (isStreamingById.get(chatId) ?? false) : false,
]);
useEffect(() => {
const container = messagesContainerRef.current;

View File

@@ -27,11 +27,17 @@ import { useSettings } from "@/hooks/useSettings";
import { PriceBadge } from "@/components/PriceBadge";
import { TURBO_MODELS } from "@/ipc/shared/language_model_constants";
import { cn } from "@/lib/utils";
import { useQueryClient } from "@tanstack/react-query";
import { TOKEN_COUNT_QUERY_KEY } from "@/hooks/useCountTokens";
export function ModelPicker() {
const { settings, updateSettings } = useSettings();
const queryClient = useQueryClient();
const onModelSelect = (model: LargeLanguageModel) => {
updateSettings({ selectedModel: model });
// Invalidate token count when model changes since different models have different context windows
// (technically they have different tokenizers, but we don't keep track of that).
queryClient.invalidateQueries({ queryKey: TOKEN_COUNT_QUERY_KEY });
};
const [open, setOpen] = useState(false);

View File

@@ -52,13 +52,14 @@ import {
TooltipProvider,
TooltipTrigger,
} from "../ui/tooltip";
import { useNavigate } from "@tanstack/react-router";
import { useVersions } from "@/hooks/useVersions";
import { useAttachments } from "@/hooks/useAttachments";
import { AttachmentsList } from "./AttachmentsList";
import { DragDropOverlay } from "./DragDropOverlay";
import { FileAttachmentDropdown } from "./FileAttachmentDropdown";
import { showError, showExtraFilesToast } from "@/lib/toast";
import { showExtraFilesToast } from "@/lib/toast";
import { useSummarizeInNewChat } from "./SummarizeInNewChatButton";
import { ChatInputControls } from "../ChatInputControls";
import { ChatErrorBox } from "./ChatErrorBox";
import {
@@ -419,30 +420,10 @@ function SuggestionButton({
}
function SummarizeInNewChatButton() {
const chatId = useAtomValue(selectedChatIdAtom);
const appId = useAtomValue(selectedAppIdAtom);
const { streamMessage } = useStreamChat();
const navigate = useNavigate();
const onClick = async () => {
if (!appId) {
console.error("No app id found");
return;
}
try {
const newChatId = await IpcClient.getInstance().createChat(appId);
// navigate to new chat
await navigate({ to: "/chat", search: { id: newChatId } });
await streamMessage({
prompt: "Summarize from chat-id=" + chatId,
chatId: newChatId,
});
} catch (err) {
showError(err);
}
};
const { handleSummarize } = useSummarizeInNewChat();
return (
<SuggestionButton
onClick={onClick}
onClick={handleSummarize}
tooltipText="Creating a new chat makes the AI more focused and efficient"
>
Summarize to new chat

View File

@@ -12,6 +12,7 @@ import {
GitCommit,
Copy,
Check,
Info,
} from "lucide-react";
import { formatDistanceToNow, format } from "date-fns";
import { useVersions } from "@/hooks/useVersions";
@@ -277,6 +278,20 @@ const ChatMessage = ({ message, isLastMessage }: ChatMessageProps) => {
</Tooltip>
</TooltipProvider>
)}
{isLastMessage && message.totalTokens && (
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<div className="flex items-center space-x-1 px-1 py-0.5">
<Info className="h-3 w-3" />
</div>
</TooltipTrigger>
<TooltipContent>
Max tokens used: {message.totalTokens.toLocaleString()}
</TooltipContent>
</Tooltip>
</TooltipProvider>
)}
</div>
)}
</div>

View File

@@ -0,0 +1,89 @@
import { AlertTriangle, ArrowRight } from "lucide-react";
import { Button } from "@/components/ui/button";
import {
Tooltip,
TooltipContent,
TooltipTrigger,
} from "@/components/ui/tooltip";
import { useSummarizeInNewChat } from "./SummarizeInNewChatButton";
const CONTEXT_LIMIT_THRESHOLD = 40_000;
interface ContextLimitBannerProps {
totalTokens?: number | null;
contextWindow?: number;
}
function formatTokenCount(count: number): string {
if (count >= 1000) {
return `${(count / 1000).toFixed(1)}k`.replace(".0k", "k");
}
return count.toString();
}
export function ContextLimitBanner({
totalTokens,
contextWindow,
}: ContextLimitBannerProps) {
const { handleSummarize } = useSummarizeInNewChat();
// Don't show banner if we don't have the necessary data
if (!totalTokens || !contextWindow) {
return null;
}
// Check if we're within 40k tokens of the context limit
const tokensRemaining = contextWindow - totalTokens;
if (tokensRemaining > CONTEXT_LIMIT_THRESHOLD) {
return null;
}
return (
<div
className="mx-auto max-w-3xl my-3 p-2 rounded-lg border border-amber-500/30 bg-amber-500/10 flex flex-col gap-2"
data-testid="context-limit-banner"
>
<div className="flex items-center gap-2 text-amber-600 dark:text-amber-400">
<Tooltip>
<TooltipTrigger asChild>
<Button
variant="ghost"
size="icon"
className="h-5 w-5 p-0 hover:bg-transparent text-amber-600 dark:text-amber-400 cursor-help"
>
<AlertTriangle className="h-4 w-4 shrink-0" />
</Button>
</TooltipTrigger>
<TooltipContent className="w-auto p-2 text-xs" side="top">
<div className="grid gap-1">
<div className="flex justify-between gap-4">
<span>Used:</span>
<span className="font-medium">
{formatTokenCount(totalTokens)}
</span>
</div>
<div className="flex justify-between gap-4">
<span>Limit:</span>
<span className="font-medium">
{formatTokenCount(contextWindow)}
</span>
</div>
</div>
</TooltipContent>
</Tooltip>
<p className="text-sm font-medium">
You're close to the context limit for this chat.
</p>
</div>
<Button
onClick={handleSummarize}
variant="outline"
size="sm"
className="h-8 border-amber-500/50 hover:bg-amber-500/20 hover:border-amber-500 text-amber-600 dark:text-amber-400"
>
Summarize into new chat
<ArrowRight className="h-3 w-3 ml-2" />
</Button>
</div>
);
}

View File

@@ -18,6 +18,8 @@ import { useLanguageModelProviders } from "@/hooks/useLanguageModelProviders";
import { useSettings } from "@/hooks/useSettings";
import { useUserBudgetInfo } from "@/hooks/useUserBudgetInfo";
import { PromoMessage } from "./PromoMessage";
import { ContextLimitBanner } from "./ContextLimitBanner";
import { useCountTokens } from "@/hooks/useCountTokens";
interface MessagesListProps {
messages: Message[];
@@ -36,6 +38,11 @@ export const MessagesList = forwardRef<HTMLDivElement, MessagesListProps>(
const [isRetryLoading, setIsRetryLoading] = useState(false);
const selectedChatId = useAtomValue(selectedChatIdAtom);
const { userBudget } = useUserBudgetInfo();
// Only fetch token count when not streaming
const { result: tokenCountResult } = useCountTokens(
!isStreaming ? selectedChatId : null,
"",
);
const renderSetupBanner = () => {
const selectedModel = settings?.selectedModel;
@@ -73,6 +80,13 @@ export const MessagesList = forwardRef<HTMLDivElement, MessagesListProps>(
</div>
</div>
)}
{/* Show context limit banner when close to token limit */}
{!isStreaming && tokenCountResult && (
<ContextLimitBanner
totalTokens={tokenCountResult.actualMaxTokens}
contextWindow={tokenCountResult.contextWindow}
/>
)}
{!isStreaming && (
<div className="flex max-w-3xl mx-auto gap-2">
{!!messages.length &&

View File

@@ -0,0 +1,38 @@
import { useNavigate } from "@tanstack/react-router";
import { useAtomValue } from "jotai";
import { selectedChatIdAtom } from "@/atoms/chatAtoms";
import { selectedAppIdAtom } from "@/atoms/appAtoms";
import { useStreamChat } from "@/hooks/useStreamChat";
import { IpcClient } from "@/ipc/ipc_client";
import { showError } from "@/lib/toast";
export function useSummarizeInNewChat() {
const chatId = useAtomValue(selectedChatIdAtom);
const appId = useAtomValue(selectedAppIdAtom);
const { streamMessage } = useStreamChat();
const navigate = useNavigate();
const handleSummarize = async () => {
if (!appId) {
console.error("No app id found");
return;
}
if (!chatId) {
console.error("No chat id found");
return;
}
try {
const newChatId = await IpcClient.getInstance().createChat(appId);
// navigate to new chat
await navigate({ to: "/chat", search: { id: newChatId } });
await streamMessage({
prompt: "Summarize from chat-id=" + chatId,
chatId: newChatId,
});
} catch (err) {
showError(err);
}
};
return { handleSummarize };
}

View File

@@ -1,4 +1,4 @@
import React, { useEffect, useState } from "react";
import React from "react";
import {
Tooltip,
TooltipContent,
@@ -24,31 +24,15 @@ interface TokenBarProps {
export function TokenBar({ chatId }: TokenBarProps) {
const [inputValue] = useAtom(chatInputValueAtom);
const { countTokens, result } = useCountTokens();
const [error, setError] = useState<string | null>(null);
const { settings } = useSettings();
useEffect(() => {
if (!chatId) return;
// Mark this as used, we need to re-trigger token count
// when selected model changes.
void settings?.selectedModel;
const debounceTimer = setTimeout(() => {
countTokens(chatId, inputValue).catch((err) => {
setError("Failed to count tokens");
console.error("Token counting error:", err);
});
}, 500);
return () => clearTimeout(debounceTimer);
}, [chatId, inputValue, countTokens, settings?.selectedModel]);
const { result, error } = useCountTokens(chatId ?? null, inputValue);
if (!chatId || !result) {
return null;
}
const {
totalTokens,
estimatedTotalTokens: totalTokens,
messageHistoryTokens,
codebaseTokens,
mentionedAppsTokens,
@@ -142,7 +126,9 @@ export function TokenBar({ chatId }: TokenBarProps) {
</TooltipContent>
</Tooltip>
</TooltipProvider>
{error && <div className="text-red-500 text-xs mt-1">{error}</div>}
{error && (
<div className="text-red-500 text-xs mt-1">Failed to count tokens</div>
)}
{(!settings?.enableProSmartFilesContextMode ||
!settings?.enableDyadPro) && (
<div className="text-xs text-center text-muted-foreground mt-2">