Summarize chat trigger (#1890)

> [!NOTE] > Adds a context-limit banner with one-click “summarize into new chat,” refactors token counting with react-query, and persists per-message max token usage. > > - **Chat UX** > - **Context limit banner** (`ContextLimitBanner.tsx`, `MessagesList.tsx`): shows when within 40k tokens of `contextWindow`, with tooltip and action to summarize into a new chat. > - **Summarize flow**: extracted to `useSummarizeInNewChat` and used in chat input and banner; new summarize system prompt (`summarize_chat_system_prompt.ts`). > - **Token usage & counting** > - **Persist max tokens used per assistant message**: DB migration (`messages.max_tokens_used`), schema updates, and saving usage during streaming (`chat_stream_handlers.ts`). > - **Token counting refactor** (`useCountTokens.ts`): react-query with debounce; returns `estimatedTotalTokens` and `actualMaxTokens`; invalidated on model change and stream end; `TokenBar` updated. > - **Surfacing usage**: tooltip on latest assistant message shows total tokens (`ChatMessage.tsx`). > - **Model/config tweaks** > - Set `auto` model `contextWindow` to `200_000` (`language_model_constants.ts`). > - Improve chat auto-scroll dependency (`ChatPanel.tsx`). > - Fix app path validation regex (`app_handlers.ts`). > - **Testing & dev server** > - E2E tests for banner and summarize (`e2e-tests/context_limit_banner.spec.ts` + fixtures/snapshot). > - Fake LLM server streams usage to simulate high token scenarios (`testing/fake-llm-server/*`). > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 2ae16a14d50699cc772407426419192c2fdf2ec3. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup>   --- ## Summary by cubic Adds a “Summarize into new chat” trigger and a context limit banner to help keep conversations focused and avoid hitting model limits. Also tracks and surfaces actual token usage per assistant message, with a token counting refactor for reliability. - **New Features** - Summarize into new chat from the input or banner; improved system prompt with clear output format. - Context limit banner shows when within 40k tokens of the model’s context window and offers a one-click summarize action. - Tooltip on the latest assistant message shows total tokens used. - **Refactors** - Token counting now uses react-query and returns estimatedTotalTokens and actualMaxTokens; counts are invalidated on model change and when streaming settles. - Persist per-message max_tokens_used in the messages table; backend aggregates model usage during streaming and saves it. - Adjusted default “Auto” model contextWindow to 200k for more realistic limits. - Improved chat scrolling while streaming; fixed app path validation regex. <sup>Written for commit 2ae16a14d50699cc772407426419192c2fdf2ec3. Summary will update automatically on new commits.</sup>
2025-12-04 23:00:28 -08:00
parent 90c5805b57
commit 6235f7bb9d
24 changed files with 1185 additions and 91 deletions
--- a/src/components/ChatPanel.tsx
+++ b/src/components/ChatPanel.tsx
@@ -97,7 +97,11 @@ export function ChatPanel({
    const streamCount = chatId ? (streamCountById.get(chatId) ?? 0) : 0;
    console.log("streamCount - scrolling to bottom", streamCount);
    scrollToBottom();
-  }, [chatId, chatId ? (streamCountById.get(chatId) ?? 0) : 0]);
+  }, [
+    chatId,
+    chatId ? (streamCountById.get(chatId) ?? 0) : 0,
+    chatId ? (isStreamingById.get(chatId) ?? false) : false,
+  ]);

  useEffect(() => {
    const container = messagesContainerRef.current;
--- a/src/components/ModelPicker.tsx
+++ b/src/components/ModelPicker.tsx
@@ -27,11 +27,17 @@ import { useSettings } from "@/hooks/useSettings";
 import { PriceBadge } from "@/components/PriceBadge";
 import { TURBO_MODELS } from "@/ipc/shared/language_model_constants";
 import { cn } from "@/lib/utils";
+import { useQueryClient } from "@tanstack/react-query";
+import { TOKEN_COUNT_QUERY_KEY } from "@/hooks/useCountTokens";

 export function ModelPicker() {
  const { settings, updateSettings } = useSettings();
+  const queryClient = useQueryClient();
  const onModelSelect = (model: LargeLanguageModel) => {
    updateSettings({ selectedModel: model });
+    // Invalidate token count when model changes since different models have different context windows
+    // (technically they have different tokenizers, but we don't keep track of that).
+    queryClient.invalidateQueries({ queryKey: TOKEN_COUNT_QUERY_KEY });
  };

  const [open, setOpen] = useState(false);
--- a/src/components/chat/ChatInput.tsx
+++ b/src/components/chat/ChatInput.tsx
@@ -52,13 +52,14 @@ import {
  TooltipProvider,
  TooltipTrigger,
 } from "../ui/tooltip";
-import { useNavigate } from "@tanstack/react-router";
+
 import { useVersions } from "@/hooks/useVersions";
 import { useAttachments } from "@/hooks/useAttachments";
 import { AttachmentsList } from "./AttachmentsList";
 import { DragDropOverlay } from "./DragDropOverlay";
 import { FileAttachmentDropdown } from "./FileAttachmentDropdown";
-import { showError, showExtraFilesToast } from "@/lib/toast";
+import { showExtraFilesToast } from "@/lib/toast";
+import { useSummarizeInNewChat } from "./SummarizeInNewChatButton";
 import { ChatInputControls } from "../ChatInputControls";
 import { ChatErrorBox } from "./ChatErrorBox";
 import {
@@ -419,30 +420,10 @@ function SuggestionButton({
 }

 function SummarizeInNewChatButton() {
-  const chatId = useAtomValue(selectedChatIdAtom);
-  const appId = useAtomValue(selectedAppIdAtom);
-  const { streamMessage } = useStreamChat();
-  const navigate = useNavigate();
-  const onClick = async () => {
-    if (!appId) {
-      console.error("No app id found");
-      return;
-    }
-    try {
-      const newChatId = await IpcClient.getInstance().createChat(appId);
-      // navigate to new chat
-      await navigate({ to: "/chat", search: { id: newChatId } });
-      await streamMessage({
-        prompt: "Summarize from chat-id=" + chatId,
-        chatId: newChatId,
-      });
-    } catch (err) {
-      showError(err);
-    }
-  };
+  const { handleSummarize } = useSummarizeInNewChat();
  return (
    <SuggestionButton
-      onClick={onClick}
+      onClick={handleSummarize}
      tooltipText="Creating a new chat makes the AI more focused and efficient"
    >
      Summarize to new chat
--- a/src/components/chat/ChatMessage.tsx
+++ b/src/components/chat/ChatMessage.tsx
@@ -12,6 +12,7 @@ import {
  GitCommit,
  Copy,
  Check,
+  Info,
 } from "lucide-react";
 import { formatDistanceToNow, format } from "date-fns";
 import { useVersions } from "@/hooks/useVersions";
@@ -277,6 +278,20 @@ const ChatMessage = ({ message, isLastMessage }: ChatMessageProps) => {
                </Tooltip>
              </TooltipProvider>
            )}
+            {isLastMessage && message.totalTokens && (
+              <TooltipProvider>
+                <Tooltip>
+                  <TooltipTrigger asChild>
+                    <div className="flex items-center space-x-1 px-1 py-0.5">
+                      <Info className="h-3 w-3" />
+                    </div>
+                  </TooltipTrigger>
+                  <TooltipContent>
+                    Max tokens used: {message.totalTokens.toLocaleString()}
+                  </TooltipContent>
+                </Tooltip>
+              </TooltipProvider>
+            )}
          </div>
        )}
      </div>
--- a/src/components/chat/ContextLimitBanner.tsx
+++ b/src/components/chat/ContextLimitBanner.tsx
@@ -0,0 +1,89 @@
+import { AlertTriangle, ArrowRight } from "lucide-react";
+import { Button } from "@/components/ui/button";
+import {
+  Tooltip,
+  TooltipContent,
+  TooltipTrigger,
+} from "@/components/ui/tooltip";
+import { useSummarizeInNewChat } from "./SummarizeInNewChatButton";
+
+const CONTEXT_LIMIT_THRESHOLD = 40_000;
+
+interface ContextLimitBannerProps {
+  totalTokens?: number | null;
+  contextWindow?: number;
+}
+
+function formatTokenCount(count: number): string {
+  if (count >= 1000) {
+    return `${(count / 1000).toFixed(1)}k`.replace(".0k", "k");
+  }
+  return count.toString();
+}
+
+export function ContextLimitBanner({
+  totalTokens,
+  contextWindow,
+}: ContextLimitBannerProps) {
+  const { handleSummarize } = useSummarizeInNewChat();
+
+  // Don't show banner if we don't have the necessary data
+  if (!totalTokens || !contextWindow) {
+    return null;
+  }
+
+  // Check if we're within 40k tokens of the context limit
+  const tokensRemaining = contextWindow - totalTokens;
+  if (tokensRemaining > CONTEXT_LIMIT_THRESHOLD) {
+    return null;
+  }
+
+  return (
+    <div
+      className="mx-auto max-w-3xl my-3 p-2 rounded-lg border border-amber-500/30 bg-amber-500/10 flex flex-col gap-2"
+      data-testid="context-limit-banner"
+    >
+      <div className="flex items-center gap-2 text-amber-600 dark:text-amber-400">
+        <Tooltip>
+          <TooltipTrigger asChild>
+            <Button
+              variant="ghost"
+              size="icon"
+              className="h-5 w-5 p-0 hover:bg-transparent text-amber-600 dark:text-amber-400 cursor-help"
+            >
+              <AlertTriangle className="h-4 w-4 shrink-0" />
+            </Button>
+          </TooltipTrigger>
+          <TooltipContent className="w-auto p-2 text-xs" side="top">
+            <div className="grid gap-1">
+              <div className="flex justify-between gap-4">
+                <span>Used:</span>
+                <span className="font-medium">
+                  {formatTokenCount(totalTokens)}
+                </span>
+              </div>
+              <div className="flex justify-between gap-4">
+                <span>Limit:</span>
+                <span className="font-medium">
+                  {formatTokenCount(contextWindow)}
+                </span>
+              </div>
+            </div>
+          </TooltipContent>
+        </Tooltip>
+        <p className="text-sm font-medium">
+          You're close to the context limit for this chat.
+        </p>
+      </div>
+      <Button
+        onClick={handleSummarize}
+        variant="outline"
+        size="sm"
+        className="h-8 border-amber-500/50 hover:bg-amber-500/20 hover:border-amber-500 text-amber-600 dark:text-amber-400"
+      >
+        Summarize into new chat
+        <ArrowRight className="h-3 w-3 ml-2" />
+      </Button>
+    </div>
+  );
+}
--- a/src/components/chat/MessagesList.tsx
+++ b/src/components/chat/MessagesList.tsx
@@ -18,6 +18,8 @@ import { useLanguageModelProviders } from "@/hooks/useLanguageModelProviders";
 import { useSettings } from "@/hooks/useSettings";
 import { useUserBudgetInfo } from "@/hooks/useUserBudgetInfo";
 import { PromoMessage } from "./PromoMessage";
+import { ContextLimitBanner } from "./ContextLimitBanner";
+import { useCountTokens } from "@/hooks/useCountTokens";

 interface MessagesListProps {
  messages: Message[];
@@ -36,6 +38,11 @@ export const MessagesList = forwardRef<HTMLDivElement, MessagesListProps>(
    const [isRetryLoading, setIsRetryLoading] = useState(false);
    const selectedChatId = useAtomValue(selectedChatIdAtom);
    const { userBudget } = useUserBudgetInfo();
+    // Only fetch token count when not streaming
+    const { result: tokenCountResult } = useCountTokens(
+      !isStreaming ? selectedChatId : null,
+      "",
+    );

    const renderSetupBanner = () => {
      const selectedModel = settings?.selectedModel;
@@ -73,6 +80,13 @@ export const MessagesList = forwardRef<HTMLDivElement, MessagesListProps>(
                </div>
              </div>
            )}
+        {/* Show context limit banner when close to token limit */}
+        {!isStreaming && tokenCountResult && (
+          <ContextLimitBanner
+            totalTokens={tokenCountResult.actualMaxTokens}
+            contextWindow={tokenCountResult.contextWindow}
+          />
+        )}
        {!isStreaming && (
          <div className="flex max-w-3xl mx-auto gap-2">
            {!!messages.length &&
--- a/src/components/chat/SummarizeInNewChatButton.tsx
+++ b/src/components/chat/SummarizeInNewChatButton.tsx
@@ -0,0 +1,38 @@
+import { useNavigate } from "@tanstack/react-router";
+import { useAtomValue } from "jotai";
+import { selectedChatIdAtom } from "@/atoms/chatAtoms";
+import { selectedAppIdAtom } from "@/atoms/appAtoms";
+import { useStreamChat } from "@/hooks/useStreamChat";
+import { IpcClient } from "@/ipc/ipc_client";
+import { showError } from "@/lib/toast";
+
+export function useSummarizeInNewChat() {
+  const chatId = useAtomValue(selectedChatIdAtom);
+  const appId = useAtomValue(selectedAppIdAtom);
+  const { streamMessage } = useStreamChat();
+  const navigate = useNavigate();
+
+  const handleSummarize = async () => {
+    if (!appId) {
+      console.error("No app id found");
+      return;
+    }
+    if (!chatId) {
+      console.error("No chat id found");
+      return;
+    }
+    try {
+      const newChatId = await IpcClient.getInstance().createChat(appId);
+      // navigate to new chat
+      await navigate({ to: "/chat", search: { id: newChatId } });
+      await streamMessage({
+        prompt: "Summarize from chat-id=" + chatId,
+        chatId: newChatId,
+      });
+    } catch (err) {
+      showError(err);
+    }
+  };
+
+  return { handleSummarize };
+}
--- a/src/components/chat/TokenBar.tsx
+++ b/src/components/chat/TokenBar.tsx
@@ -1,4 +1,4 @@
-import React, { useEffect, useState } from "react";
+import React from "react";
 import {
  Tooltip,
  TooltipContent,
@@ -24,31 +24,15 @@ interface TokenBarProps {

 export function TokenBar({ chatId }: TokenBarProps) {
  const [inputValue] = useAtom(chatInputValueAtom);
-  const { countTokens, result } = useCountTokens();
-  const [error, setError] = useState<string | null>(null);
  const { settings } = useSettings();
-  useEffect(() => {
-    if (!chatId) return;
-    // Mark this as used, we need to re-trigger token count
-    // when selected model changes.
-    void settings?.selectedModel;
-
-    const debounceTimer = setTimeout(() => {
-      countTokens(chatId, inputValue).catch((err) => {
-        setError("Failed to count tokens");
-        console.error("Token counting error:", err);
-      });
-    }, 500);
-
-    return () => clearTimeout(debounceTimer);
-  }, [chatId, inputValue, countTokens, settings?.selectedModel]);
+  const { result, error } = useCountTokens(chatId ?? null, inputValue);

  if (!chatId || !result) {
    return null;
  }

  const {
-    totalTokens,
+    estimatedTotalTokens: totalTokens,
    messageHistoryTokens,
    codebaseTokens,
    mentionedAppsTokens,
@@ -142,7 +126,9 @@ export function TokenBar({ chatId }: TokenBarProps) {
          </TooltipContent>
        </Tooltip>
      </TooltipProvider>
-      {error && <div className="text-red-500 text-xs mt-1">{error}</div>}
+      {error && (
+        <div className="text-red-500 text-xs mt-1">Failed to count tokens</div>
+      )}
      {(!settings?.enableProSmartFilesContextMode ||
        !settings?.enableDyadPro) && (
        <div className="text-xs text-center text-muted-foreground mt-2">