Summarize chat trigger (#1890)
<!-- CURSOR_SUMMARY --> > [!NOTE] > Adds a context-limit banner with one-click “summarize into new chat,” refactors token counting with react-query, and persists per-message max token usage. > > - **Chat UX** > - **Context limit banner** (`ContextLimitBanner.tsx`, `MessagesList.tsx`): shows when within 40k tokens of `contextWindow`, with tooltip and action to summarize into a new chat. > - **Summarize flow**: extracted to `useSummarizeInNewChat` and used in chat input and banner; new summarize system prompt (`summarize_chat_system_prompt.ts`). > - **Token usage & counting** > - **Persist max tokens used per assistant message**: DB migration (`messages.max_tokens_used`), schema updates, and saving usage during streaming (`chat_stream_handlers.ts`). > - **Token counting refactor** (`useCountTokens.ts`): react-query with debounce; returns `estimatedTotalTokens` and `actualMaxTokens`; invalidated on model change and stream end; `TokenBar` updated. > - **Surfacing usage**: tooltip on latest assistant message shows total tokens (`ChatMessage.tsx`). > - **Model/config tweaks** > - Set `auto` model `contextWindow` to `200_000` (`language_model_constants.ts`). > - Improve chat auto-scroll dependency (`ChatPanel.tsx`). > - Fix app path validation regex (`app_handlers.ts`). > - **Testing & dev server** > - E2E tests for banner and summarize (`e2e-tests/context_limit_banner.spec.ts` + fixtures/snapshot). > - Fake LLM server streams usage to simulate high token scenarios (`testing/fake-llm-server/*`). > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 2ae16a14d50699cc772407426419192c2fdf2ec3. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY --> <!-- This is an auto-generated description by cubic. --> --- ## Summary by cubic Adds a “Summarize into new chat” trigger and a context limit banner to help keep conversations focused and avoid hitting model limits. Also tracks and surfaces actual token usage per assistant message, with a token counting refactor for reliability. - **New Features** - Summarize into new chat from the input or banner; improved system prompt with clear output format. - Context limit banner shows when within 40k tokens of the model’s context window and offers a one-click summarize action. - Tooltip on the latest assistant message shows total tokens used. - **Refactors** - Token counting now uses react-query and returns estimatedTotalTokens and actualMaxTokens; counts are invalidated on model change and when streaming settles. - Persist per-message max_tokens_used in the messages table; backend aggregates model usage during streaming and saves it. - Adjusted default “Auto” model contextWindow to 200k for more realistic limits. - Improved chat scrolling while streaming; fixed app path validation regex. <sup>Written for commit 2ae16a14d50699cc772407426419192c2fdf2ec3. Summary will update automatically on new commits.</sup> <!-- End of auto-generated description by cubic. -->
This commit is contained in:
@@ -97,7 +97,11 @@ export function ChatPanel({
|
||||
const streamCount = chatId ? (streamCountById.get(chatId) ?? 0) : 0;
|
||||
console.log("streamCount - scrolling to bottom", streamCount);
|
||||
scrollToBottom();
|
||||
}, [chatId, chatId ? (streamCountById.get(chatId) ?? 0) : 0]);
|
||||
}, [
|
||||
chatId,
|
||||
chatId ? (streamCountById.get(chatId) ?? 0) : 0,
|
||||
chatId ? (isStreamingById.get(chatId) ?? false) : false,
|
||||
]);
|
||||
|
||||
useEffect(() => {
|
||||
const container = messagesContainerRef.current;
|
||||
|
||||
@@ -27,11 +27,17 @@ import { useSettings } from "@/hooks/useSettings";
|
||||
import { PriceBadge } from "@/components/PriceBadge";
|
||||
import { TURBO_MODELS } from "@/ipc/shared/language_model_constants";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { useQueryClient } from "@tanstack/react-query";
|
||||
import { TOKEN_COUNT_QUERY_KEY } from "@/hooks/useCountTokens";
|
||||
|
||||
export function ModelPicker() {
|
||||
const { settings, updateSettings } = useSettings();
|
||||
const queryClient = useQueryClient();
|
||||
const onModelSelect = (model: LargeLanguageModel) => {
|
||||
updateSettings({ selectedModel: model });
|
||||
// Invalidate token count when model changes since different models have different context windows
|
||||
// (technically they have different tokenizers, but we don't keep track of that).
|
||||
queryClient.invalidateQueries({ queryKey: TOKEN_COUNT_QUERY_KEY });
|
||||
};
|
||||
|
||||
const [open, setOpen] = useState(false);
|
||||
|
||||
@@ -52,13 +52,14 @@ import {
|
||||
TooltipProvider,
|
||||
TooltipTrigger,
|
||||
} from "../ui/tooltip";
|
||||
import { useNavigate } from "@tanstack/react-router";
|
||||
|
||||
import { useVersions } from "@/hooks/useVersions";
|
||||
import { useAttachments } from "@/hooks/useAttachments";
|
||||
import { AttachmentsList } from "./AttachmentsList";
|
||||
import { DragDropOverlay } from "./DragDropOverlay";
|
||||
import { FileAttachmentDropdown } from "./FileAttachmentDropdown";
|
||||
import { showError, showExtraFilesToast } from "@/lib/toast";
|
||||
import { showExtraFilesToast } from "@/lib/toast";
|
||||
import { useSummarizeInNewChat } from "./SummarizeInNewChatButton";
|
||||
import { ChatInputControls } from "../ChatInputControls";
|
||||
import { ChatErrorBox } from "./ChatErrorBox";
|
||||
import {
|
||||
@@ -419,30 +420,10 @@ function SuggestionButton({
|
||||
}
|
||||
|
||||
function SummarizeInNewChatButton() {
|
||||
const chatId = useAtomValue(selectedChatIdAtom);
|
||||
const appId = useAtomValue(selectedAppIdAtom);
|
||||
const { streamMessage } = useStreamChat();
|
||||
const navigate = useNavigate();
|
||||
const onClick = async () => {
|
||||
if (!appId) {
|
||||
console.error("No app id found");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const newChatId = await IpcClient.getInstance().createChat(appId);
|
||||
// navigate to new chat
|
||||
await navigate({ to: "/chat", search: { id: newChatId } });
|
||||
await streamMessage({
|
||||
prompt: "Summarize from chat-id=" + chatId,
|
||||
chatId: newChatId,
|
||||
});
|
||||
} catch (err) {
|
||||
showError(err);
|
||||
}
|
||||
};
|
||||
const { handleSummarize } = useSummarizeInNewChat();
|
||||
return (
|
||||
<SuggestionButton
|
||||
onClick={onClick}
|
||||
onClick={handleSummarize}
|
||||
tooltipText="Creating a new chat makes the AI more focused and efficient"
|
||||
>
|
||||
Summarize to new chat
|
||||
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
GitCommit,
|
||||
Copy,
|
||||
Check,
|
||||
Info,
|
||||
} from "lucide-react";
|
||||
import { formatDistanceToNow, format } from "date-fns";
|
||||
import { useVersions } from "@/hooks/useVersions";
|
||||
@@ -277,6 +278,20 @@ const ChatMessage = ({ message, isLastMessage }: ChatMessageProps) => {
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
)}
|
||||
{isLastMessage && message.totalTokens && (
|
||||
<TooltipProvider>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<div className="flex items-center space-x-1 px-1 py-0.5">
|
||||
<Info className="h-3 w-3" />
|
||||
</div>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
Max tokens used: {message.totalTokens.toLocaleString()}
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
89
src/components/chat/ContextLimitBanner.tsx
Normal file
89
src/components/chat/ContextLimitBanner.tsx
Normal file
@@ -0,0 +1,89 @@
|
||||
import { AlertTriangle, ArrowRight } from "lucide-react";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
Tooltip,
|
||||
TooltipContent,
|
||||
TooltipTrigger,
|
||||
} from "@/components/ui/tooltip";
|
||||
import { useSummarizeInNewChat } from "./SummarizeInNewChatButton";
|
||||
|
||||
const CONTEXT_LIMIT_THRESHOLD = 40_000;
|
||||
|
||||
interface ContextLimitBannerProps {
|
||||
totalTokens?: number | null;
|
||||
contextWindow?: number;
|
||||
}
|
||||
|
||||
function formatTokenCount(count: number): string {
|
||||
if (count >= 1000) {
|
||||
return `${(count / 1000).toFixed(1)}k`.replace(".0k", "k");
|
||||
}
|
||||
return count.toString();
|
||||
}
|
||||
|
||||
export function ContextLimitBanner({
|
||||
totalTokens,
|
||||
contextWindow,
|
||||
}: ContextLimitBannerProps) {
|
||||
const { handleSummarize } = useSummarizeInNewChat();
|
||||
|
||||
// Don't show banner if we don't have the necessary data
|
||||
if (!totalTokens || !contextWindow) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Check if we're within 40k tokens of the context limit
|
||||
const tokensRemaining = contextWindow - totalTokens;
|
||||
if (tokensRemaining > CONTEXT_LIMIT_THRESHOLD) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
className="mx-auto max-w-3xl my-3 p-2 rounded-lg border border-amber-500/30 bg-amber-500/10 flex flex-col gap-2"
|
||||
data-testid="context-limit-banner"
|
||||
>
|
||||
<div className="flex items-center gap-2 text-amber-600 dark:text-amber-400">
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
className="h-5 w-5 p-0 hover:bg-transparent text-amber-600 dark:text-amber-400 cursor-help"
|
||||
>
|
||||
<AlertTriangle className="h-4 w-4 shrink-0" />
|
||||
</Button>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent className="w-auto p-2 text-xs" side="top">
|
||||
<div className="grid gap-1">
|
||||
<div className="flex justify-between gap-4">
|
||||
<span>Used:</span>
|
||||
<span className="font-medium">
|
||||
{formatTokenCount(totalTokens)}
|
||||
</span>
|
||||
</div>
|
||||
<div className="flex justify-between gap-4">
|
||||
<span>Limit:</span>
|
||||
<span className="font-medium">
|
||||
{formatTokenCount(contextWindow)}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
<p className="text-sm font-medium">
|
||||
You're close to the context limit for this chat.
|
||||
</p>
|
||||
</div>
|
||||
<Button
|
||||
onClick={handleSummarize}
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="h-8 border-amber-500/50 hover:bg-amber-500/20 hover:border-amber-500 text-amber-600 dark:text-amber-400"
|
||||
>
|
||||
Summarize into new chat
|
||||
<ArrowRight className="h-3 w-3 ml-2" />
|
||||
</Button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -18,6 +18,8 @@ import { useLanguageModelProviders } from "@/hooks/useLanguageModelProviders";
|
||||
import { useSettings } from "@/hooks/useSettings";
|
||||
import { useUserBudgetInfo } from "@/hooks/useUserBudgetInfo";
|
||||
import { PromoMessage } from "./PromoMessage";
|
||||
import { ContextLimitBanner } from "./ContextLimitBanner";
|
||||
import { useCountTokens } from "@/hooks/useCountTokens";
|
||||
|
||||
interface MessagesListProps {
|
||||
messages: Message[];
|
||||
@@ -36,6 +38,11 @@ export const MessagesList = forwardRef<HTMLDivElement, MessagesListProps>(
|
||||
const [isRetryLoading, setIsRetryLoading] = useState(false);
|
||||
const selectedChatId = useAtomValue(selectedChatIdAtom);
|
||||
const { userBudget } = useUserBudgetInfo();
|
||||
// Only fetch token count when not streaming
|
||||
const { result: tokenCountResult } = useCountTokens(
|
||||
!isStreaming ? selectedChatId : null,
|
||||
"",
|
||||
);
|
||||
|
||||
const renderSetupBanner = () => {
|
||||
const selectedModel = settings?.selectedModel;
|
||||
@@ -73,6 +80,13 @@ export const MessagesList = forwardRef<HTMLDivElement, MessagesListProps>(
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{/* Show context limit banner when close to token limit */}
|
||||
{!isStreaming && tokenCountResult && (
|
||||
<ContextLimitBanner
|
||||
totalTokens={tokenCountResult.actualMaxTokens}
|
||||
contextWindow={tokenCountResult.contextWindow}
|
||||
/>
|
||||
)}
|
||||
{!isStreaming && (
|
||||
<div className="flex max-w-3xl mx-auto gap-2">
|
||||
{!!messages.length &&
|
||||
|
||||
38
src/components/chat/SummarizeInNewChatButton.tsx
Normal file
38
src/components/chat/SummarizeInNewChatButton.tsx
Normal file
@@ -0,0 +1,38 @@
|
||||
import { useNavigate } from "@tanstack/react-router";
|
||||
import { useAtomValue } from "jotai";
|
||||
import { selectedChatIdAtom } from "@/atoms/chatAtoms";
|
||||
import { selectedAppIdAtom } from "@/atoms/appAtoms";
|
||||
import { useStreamChat } from "@/hooks/useStreamChat";
|
||||
import { IpcClient } from "@/ipc/ipc_client";
|
||||
import { showError } from "@/lib/toast";
|
||||
|
||||
export function useSummarizeInNewChat() {
|
||||
const chatId = useAtomValue(selectedChatIdAtom);
|
||||
const appId = useAtomValue(selectedAppIdAtom);
|
||||
const { streamMessage } = useStreamChat();
|
||||
const navigate = useNavigate();
|
||||
|
||||
const handleSummarize = async () => {
|
||||
if (!appId) {
|
||||
console.error("No app id found");
|
||||
return;
|
||||
}
|
||||
if (!chatId) {
|
||||
console.error("No chat id found");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const newChatId = await IpcClient.getInstance().createChat(appId);
|
||||
// navigate to new chat
|
||||
await navigate({ to: "/chat", search: { id: newChatId } });
|
||||
await streamMessage({
|
||||
prompt: "Summarize from chat-id=" + chatId,
|
||||
chatId: newChatId,
|
||||
});
|
||||
} catch (err) {
|
||||
showError(err);
|
||||
}
|
||||
};
|
||||
|
||||
return { handleSummarize };
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { useEffect, useState } from "react";
|
||||
import React from "react";
|
||||
import {
|
||||
Tooltip,
|
||||
TooltipContent,
|
||||
@@ -24,31 +24,15 @@ interface TokenBarProps {
|
||||
|
||||
export function TokenBar({ chatId }: TokenBarProps) {
|
||||
const [inputValue] = useAtom(chatInputValueAtom);
|
||||
const { countTokens, result } = useCountTokens();
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const { settings } = useSettings();
|
||||
useEffect(() => {
|
||||
if (!chatId) return;
|
||||
// Mark this as used, we need to re-trigger token count
|
||||
// when selected model changes.
|
||||
void settings?.selectedModel;
|
||||
|
||||
const debounceTimer = setTimeout(() => {
|
||||
countTokens(chatId, inputValue).catch((err) => {
|
||||
setError("Failed to count tokens");
|
||||
console.error("Token counting error:", err);
|
||||
});
|
||||
}, 500);
|
||||
|
||||
return () => clearTimeout(debounceTimer);
|
||||
}, [chatId, inputValue, countTokens, settings?.selectedModel]);
|
||||
const { result, error } = useCountTokens(chatId ?? null, inputValue);
|
||||
|
||||
if (!chatId || !result) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const {
|
||||
totalTokens,
|
||||
estimatedTotalTokens: totalTokens,
|
||||
messageHistoryTokens,
|
||||
codebaseTokens,
|
||||
mentionedAppsTokens,
|
||||
@@ -142,7 +126,9 @@ export function TokenBar({ chatId }: TokenBarProps) {
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
{error && <div className="text-red-500 text-xs mt-1">{error}</div>}
|
||||
{error && (
|
||||
<div className="text-red-500 text-xs mt-1">Failed to count tokens</div>
|
||||
)}
|
||||
{(!settings?.enableProSmartFilesContextMode ||
|
||||
!settings?.enableDyadPro) && (
|
||||
<div className="text-xs text-center text-muted-foreground mt-2">
|
||||
|
||||
Reference in New Issue
Block a user