diff --git a/drizzle/0017_sharp_corsair.sql b/drizzle/0017_sharp_corsair.sql new file mode 100644 index 0000000..19cea52 --- /dev/null +++ b/drizzle/0017_sharp_corsair.sql @@ -0,0 +1 @@ +ALTER TABLE `messages` ADD `max_tokens_used` integer; \ No newline at end of file diff --git a/drizzle/meta/0017_snapshot.json b/drizzle/meta/0017_snapshot.json new file mode 100644 index 0000000..7bcfc10 --- /dev/null +++ b/drizzle/meta/0017_snapshot.json @@ -0,0 +1,767 @@ +{ + "version": "6", + "dialect": "sqlite", + "id": "071199d7-dfb5-4681-85b7-228f1de3123a", + "prevId": "c0a49147-ac92-4046-afe8-42f20df9314b", + "tables": { + "apps": { + "name": "apps", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "path": { + "name": "path", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + }, + "github_org": { + "name": "github_org", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "github_repo": { + "name": "github_repo", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "github_branch": { + "name": "github_branch", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "supabase_project_id": { + "name": "supabase_project_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "supabase_parent_project_id": { + "name": "supabase_parent_project_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "neon_project_id": { + "name": "neon_project_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "neon_development_branch_id": { + "name": "neon_development_branch_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "neon_preview_branch_id": { + "name": "neon_preview_branch_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "vercel_project_id": { + "name": "vercel_project_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "vercel_project_name": { + "name": "vercel_project_name", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "vercel_team_id": { + "name": "vercel_team_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "vercel_deployment_url": { + "name": "vercel_deployment_url", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "install_command": { + "name": "install_command", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "start_command": { + "name": "start_command", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "chat_context": { + "name": "chat_context", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "is_favorite": { + "name": "is_favorite", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "0" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "chats": { + "name": "chats", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "app_id": { + "name": "app_id", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "initial_commit_hash": { + "name": "initial_commit_hash", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + } + }, + "indexes": {}, + "foreignKeys": { + "chats_app_id_apps_id_fk": { + "name": "chats_app_id_apps_id_fk", + "tableFrom": "chats", + "tableTo": "apps", + "columnsFrom": [ + "app_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "language_model_providers": { + "name": "language_model_providers", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "api_base_url": { + "name": "api_base_url", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "env_var_name": { + "name": "env_var_name", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "language_models": { + "name": "language_models", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "display_name": { + "name": "display_name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "api_name": { + "name": "api_name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "builtin_provider_id": { + "name": "builtin_provider_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "custom_provider_id": { + "name": "custom_provider_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "max_output_tokens": { + "name": "max_output_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "context_window": { + "name": "context_window", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + } + }, + "indexes": {}, + "foreignKeys": { + "language_models_custom_provider_id_language_model_providers_id_fk": { + "name": "language_models_custom_provider_id_language_model_providers_id_fk", + "tableFrom": "language_models", + "tableTo": "language_model_providers", + "columnsFrom": [ + "custom_provider_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "mcp_servers": { + "name": "mcp_servers", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "transport": { + "name": "transport", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "command": { + "name": "command", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "args": { + "name": "args", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "env_json": { + "name": "env_json", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "enabled": { + "name": "enabled", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "0" + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "mcp_tool_consents": { + "name": "mcp_tool_consents", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "server_id": { + "name": "server_id", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "tool_name": { + "name": "tool_name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "consent": { + "name": "consent", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'ask'" + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + } + }, + "indexes": { + "uniq_mcp_consent": { + "name": "uniq_mcp_consent", + "columns": [ + "server_id", + "tool_name" + ], + "isUnique": true + } + }, + "foreignKeys": { + "mcp_tool_consents_server_id_mcp_servers_id_fk": { + "name": "mcp_tool_consents_server_id_mcp_servers_id_fk", + "tableFrom": "mcp_tool_consents", + "tableTo": "mcp_servers", + "columnsFrom": [ + "server_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "messages": { + "name": "messages", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "chat_id": { + "name": "chat_id", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "role": { + "name": "role", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "approval_state": { + "name": "approval_state", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "source_commit_hash": { + "name": "source_commit_hash", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "commit_hash": { + "name": "commit_hash", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "request_id": { + "name": "request_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "max_tokens_used": { + "name": "max_tokens_used", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + } + }, + "indexes": {}, + "foreignKeys": { + "messages_chat_id_chats_id_fk": { + "name": "messages_chat_id_chats_id_fk", + "tableFrom": "messages", + "tableTo": "chats", + "columnsFrom": [ + "chat_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "prompts": { + "name": "prompts", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "versions": { + "name": "versions", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "app_id": { + "name": "app_id", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "commit_hash": { + "name": "commit_hash", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "neon_db_timestamp": { + "name": "neon_db_timestamp", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(unixepoch())" + } + }, + "indexes": { + "versions_app_commit_unique": { + "name": "versions_app_commit_unique", + "columns": [ + "app_id", + "commit_hash" + ], + "isUnique": true + } + }, + "foreignKeys": { + "versions_app_id_apps_id_fk": { + "name": "versions_app_id_apps_id_fk", + "tableFrom": "versions", + "tableTo": "apps", + "columnsFrom": [ + "app_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + } + }, + "views": {}, + "enums": {}, + "_meta": { + "schemas": {}, + "tables": {}, + "columns": {} + }, + "internal": { + "indexes": {} + } +} diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json index 6ba6f69..5baeffe 100644 --- a/drizzle/meta/_journal.json +++ b/drizzle/meta/_journal.json @@ -120,6 +120,13 @@ "when": 1762297039106, "tag": "0016_petite_thanos", "breakpoints": true + }, + { + "idx": 17, + "version": "6", + "when": 1764804624402, + "tag": "0017_sharp_corsair", + "breakpoints": true } ] } \ No newline at end of file diff --git a/e2e-tests/context_limit_banner.spec.ts b/e2e-tests/context_limit_banner.spec.ts new file mode 100644 index 0000000..32d0d7d --- /dev/null +++ b/e2e-tests/context_limit_banner.spec.ts @@ -0,0 +1,46 @@ +import { test, Timeout } from "./helpers/test_helper"; +import { expect } from "@playwright/test"; + +test("context limit banner appears and summarize works", async ({ po }) => { + await po.setUp(); + + // Send a message that triggers high token usage (110k tokens) + // With a default context window of 128k, this leaves only 18k tokens remaining + // which is below the 40k threshold to show the banner + await po.sendPrompt("tc=context-limit-response [high-tokens=110000]"); + + // Verify the context limit banner appears + const contextLimitBanner = po.page.getByTestId("context-limit-banner"); + await expect(contextLimitBanner).toBeVisible({ timeout: Timeout.MEDIUM }); + + // Verify banner text + await expect(contextLimitBanner).toContainText( + "You're close to the context limit for this chat.", + ); + + // Click the summarize button + await contextLimitBanner + .getByRole("button", { name: "Summarize into new chat" }) + .click(); + + // Wait for the new chat to load and message to complete + await po.waitForChatCompletion(); + + // Snapshot the messages in the new chat + await po.snapshotMessages(); +}); + +test("context limit banner does not appear when within limit", async ({ + po, +}) => { + await po.setUp(); + + // Send a message with low token usage (50k tokens) + // With a 128k context window, this leaves 78k tokens remaining + // which is above the 40k threshold - banner should NOT appear + await po.sendPrompt("tc=context-limit-response [high-tokens=50000]"); + + // Verify the context limit banner does NOT appear + const contextLimitBanner = po.page.getByTestId("context-limit-banner"); + await expect(contextLimitBanner).not.toBeVisible(); +}); diff --git a/e2e-tests/fixtures/context-limit-response.md b/e2e-tests/fixtures/context-limit-response.md new file mode 100644 index 0000000..4eb7451 --- /dev/null +++ b/e2e-tests/fixtures/context-limit-response.md @@ -0,0 +1,4 @@ +Here is a simple response to test the context limit banner functionality. + +This message simulates being close to the model's context window limit. + diff --git a/e2e-tests/snapshots/context_limit_banner.spec.ts_context-limit-banner-appears-and-summarize-works-1.aria.yml b/e2e-tests/snapshots/context_limit_banner.spec.ts_context-limit-banner-appears-and-summarize-works-1.aria.yml new file mode 100644 index 0000000..24096f1 --- /dev/null +++ b/e2e-tests/snapshots/context_limit_banner.spec.ts_context-limit-banner-appears-and-summarize-works-1.aria.yml @@ -0,0 +1,14 @@ +- paragraph: Summarize from chat-id=1 +- img +- text: file1.txt +- button "Edit": + - img +- img +- text: file1.txt +- paragraph: More EOM +- button: + - img +- img +- text: less than a minute ago +- button "Retry": + - img \ No newline at end of file diff --git a/src/components/ChatPanel.tsx b/src/components/ChatPanel.tsx index 6f24cc2..ee6e858 100644 --- a/src/components/ChatPanel.tsx +++ b/src/components/ChatPanel.tsx @@ -97,7 +97,11 @@ export function ChatPanel({ const streamCount = chatId ? (streamCountById.get(chatId) ?? 0) : 0; console.log("streamCount - scrolling to bottom", streamCount); scrollToBottom(); - }, [chatId, chatId ? (streamCountById.get(chatId) ?? 0) : 0]); + }, [ + chatId, + chatId ? (streamCountById.get(chatId) ?? 0) : 0, + chatId ? (isStreamingById.get(chatId) ?? false) : false, + ]); useEffect(() => { const container = messagesContainerRef.current; diff --git a/src/components/ModelPicker.tsx b/src/components/ModelPicker.tsx index 0354aa1..b8b2149 100644 --- a/src/components/ModelPicker.tsx +++ b/src/components/ModelPicker.tsx @@ -27,11 +27,17 @@ import { useSettings } from "@/hooks/useSettings"; import { PriceBadge } from "@/components/PriceBadge"; import { TURBO_MODELS } from "@/ipc/shared/language_model_constants"; import { cn } from "@/lib/utils"; +import { useQueryClient } from "@tanstack/react-query"; +import { TOKEN_COUNT_QUERY_KEY } from "@/hooks/useCountTokens"; export function ModelPicker() { const { settings, updateSettings } = useSettings(); + const queryClient = useQueryClient(); const onModelSelect = (model: LargeLanguageModel) => { updateSettings({ selectedModel: model }); + // Invalidate token count when model changes since different models have different context windows + // (technically they have different tokenizers, but we don't keep track of that). + queryClient.invalidateQueries({ queryKey: TOKEN_COUNT_QUERY_KEY }); }; const [open, setOpen] = useState(false); diff --git a/src/components/chat/ChatInput.tsx b/src/components/chat/ChatInput.tsx index 2b43302..e7f30c5 100644 --- a/src/components/chat/ChatInput.tsx +++ b/src/components/chat/ChatInput.tsx @@ -52,13 +52,14 @@ import { TooltipProvider, TooltipTrigger, } from "../ui/tooltip"; -import { useNavigate } from "@tanstack/react-router"; + import { useVersions } from "@/hooks/useVersions"; import { useAttachments } from "@/hooks/useAttachments"; import { AttachmentsList } from "./AttachmentsList"; import { DragDropOverlay } from "./DragDropOverlay"; import { FileAttachmentDropdown } from "./FileAttachmentDropdown"; -import { showError, showExtraFilesToast } from "@/lib/toast"; +import { showExtraFilesToast } from "@/lib/toast"; +import { useSummarizeInNewChat } from "./SummarizeInNewChatButton"; import { ChatInputControls } from "../ChatInputControls"; import { ChatErrorBox } from "./ChatErrorBox"; import { @@ -419,30 +420,10 @@ function SuggestionButton({ } function SummarizeInNewChatButton() { - const chatId = useAtomValue(selectedChatIdAtom); - const appId = useAtomValue(selectedAppIdAtom); - const { streamMessage } = useStreamChat(); - const navigate = useNavigate(); - const onClick = async () => { - if (!appId) { - console.error("No app id found"); - return; - } - try { - const newChatId = await IpcClient.getInstance().createChat(appId); - // navigate to new chat - await navigate({ to: "/chat", search: { id: newChatId } }); - await streamMessage({ - prompt: "Summarize from chat-id=" + chatId, - chatId: newChatId, - }); - } catch (err) { - showError(err); - } - }; + const { handleSummarize } = useSummarizeInNewChat(); return ( Summarize to new chat diff --git a/src/components/chat/ChatMessage.tsx b/src/components/chat/ChatMessage.tsx index 86c70d1..99acc59 100644 --- a/src/components/chat/ChatMessage.tsx +++ b/src/components/chat/ChatMessage.tsx @@ -12,6 +12,7 @@ import { GitCommit, Copy, Check, + Info, } from "lucide-react"; import { formatDistanceToNow, format } from "date-fns"; import { useVersions } from "@/hooks/useVersions"; @@ -277,6 +278,20 @@ const ChatMessage = ({ message, isLastMessage }: ChatMessageProps) => { )} + {isLastMessage && message.totalTokens && ( + + + +
+ +
+
+ + Max tokens used: {message.totalTokens.toLocaleString()} + +
+
+ )} )} diff --git a/src/components/chat/ContextLimitBanner.tsx b/src/components/chat/ContextLimitBanner.tsx new file mode 100644 index 0000000..8c9cd23 --- /dev/null +++ b/src/components/chat/ContextLimitBanner.tsx @@ -0,0 +1,89 @@ +import { AlertTriangle, ArrowRight } from "lucide-react"; +import { Button } from "@/components/ui/button"; +import { + Tooltip, + TooltipContent, + TooltipTrigger, +} from "@/components/ui/tooltip"; +import { useSummarizeInNewChat } from "./SummarizeInNewChatButton"; + +const CONTEXT_LIMIT_THRESHOLD = 40_000; + +interface ContextLimitBannerProps { + totalTokens?: number | null; + contextWindow?: number; +} + +function formatTokenCount(count: number): string { + if (count >= 1000) { + return `${(count / 1000).toFixed(1)}k`.replace(".0k", "k"); + } + return count.toString(); +} + +export function ContextLimitBanner({ + totalTokens, + contextWindow, +}: ContextLimitBannerProps) { + const { handleSummarize } = useSummarizeInNewChat(); + + // Don't show banner if we don't have the necessary data + if (!totalTokens || !contextWindow) { + return null; + } + + // Check if we're within 40k tokens of the context limit + const tokensRemaining = contextWindow - totalTokens; + if (tokensRemaining > CONTEXT_LIMIT_THRESHOLD) { + return null; + } + + return ( +
+
+ + + + + +
+
+ Used: + + {formatTokenCount(totalTokens)} + +
+
+ Limit: + + {formatTokenCount(contextWindow)} + +
+
+
+
+

+ You're close to the context limit for this chat. +

+
+ +
+ ); +} diff --git a/src/components/chat/MessagesList.tsx b/src/components/chat/MessagesList.tsx index 61ca90f..92a52d6 100644 --- a/src/components/chat/MessagesList.tsx +++ b/src/components/chat/MessagesList.tsx @@ -18,6 +18,8 @@ import { useLanguageModelProviders } from "@/hooks/useLanguageModelProviders"; import { useSettings } from "@/hooks/useSettings"; import { useUserBudgetInfo } from "@/hooks/useUserBudgetInfo"; import { PromoMessage } from "./PromoMessage"; +import { ContextLimitBanner } from "./ContextLimitBanner"; +import { useCountTokens } from "@/hooks/useCountTokens"; interface MessagesListProps { messages: Message[]; @@ -36,6 +38,11 @@ export const MessagesList = forwardRef( const [isRetryLoading, setIsRetryLoading] = useState(false); const selectedChatId = useAtomValue(selectedChatIdAtom); const { userBudget } = useUserBudgetInfo(); + // Only fetch token count when not streaming + const { result: tokenCountResult } = useCountTokens( + !isStreaming ? selectedChatId : null, + "", + ); const renderSetupBanner = () => { const selectedModel = settings?.selectedModel; @@ -73,6 +80,13 @@ export const MessagesList = forwardRef( )} + {/* Show context limit banner when close to token limit */} + {!isStreaming && tokenCountResult && ( + + )} {!isStreaming && (
{!!messages.length && diff --git a/src/components/chat/SummarizeInNewChatButton.tsx b/src/components/chat/SummarizeInNewChatButton.tsx new file mode 100644 index 0000000..345070a --- /dev/null +++ b/src/components/chat/SummarizeInNewChatButton.tsx @@ -0,0 +1,38 @@ +import { useNavigate } from "@tanstack/react-router"; +import { useAtomValue } from "jotai"; +import { selectedChatIdAtom } from "@/atoms/chatAtoms"; +import { selectedAppIdAtom } from "@/atoms/appAtoms"; +import { useStreamChat } from "@/hooks/useStreamChat"; +import { IpcClient } from "@/ipc/ipc_client"; +import { showError } from "@/lib/toast"; + +export function useSummarizeInNewChat() { + const chatId = useAtomValue(selectedChatIdAtom); + const appId = useAtomValue(selectedAppIdAtom); + const { streamMessage } = useStreamChat(); + const navigate = useNavigate(); + + const handleSummarize = async () => { + if (!appId) { + console.error("No app id found"); + return; + } + if (!chatId) { + console.error("No chat id found"); + return; + } + try { + const newChatId = await IpcClient.getInstance().createChat(appId); + // navigate to new chat + await navigate({ to: "/chat", search: { id: newChatId } }); + await streamMessage({ + prompt: "Summarize from chat-id=" + chatId, + chatId: newChatId, + }); + } catch (err) { + showError(err); + } + }; + + return { handleSummarize }; +} diff --git a/src/components/chat/TokenBar.tsx b/src/components/chat/TokenBar.tsx index be69b1a..58f5919 100644 --- a/src/components/chat/TokenBar.tsx +++ b/src/components/chat/TokenBar.tsx @@ -1,4 +1,4 @@ -import React, { useEffect, useState } from "react"; +import React from "react"; import { Tooltip, TooltipContent, @@ -24,31 +24,15 @@ interface TokenBarProps { export function TokenBar({ chatId }: TokenBarProps) { const [inputValue] = useAtom(chatInputValueAtom); - const { countTokens, result } = useCountTokens(); - const [error, setError] = useState(null); const { settings } = useSettings(); - useEffect(() => { - if (!chatId) return; - // Mark this as used, we need to re-trigger token count - // when selected model changes. - void settings?.selectedModel; - - const debounceTimer = setTimeout(() => { - countTokens(chatId, inputValue).catch((err) => { - setError("Failed to count tokens"); - console.error("Token counting error:", err); - }); - }, 500); - - return () => clearTimeout(debounceTimer); - }, [chatId, inputValue, countTokens, settings?.selectedModel]); + const { result, error } = useCountTokens(chatId ?? null, inputValue); if (!chatId || !result) { return null; } const { - totalTokens, + estimatedTotalTokens: totalTokens, messageHistoryTokens, codebaseTokens, mentionedAppsTokens, @@ -142,7 +126,9 @@ export function TokenBar({ chatId }: TokenBarProps) { - {error &&
{error}
} + {error && ( +
Failed to count tokens
+ )} {(!settings?.enableProSmartFilesContextMode || !settings?.enableDyadPro) && (
diff --git a/src/db/schema.ts b/src/db/schema.ts index dfc1985..0b97f77 100644 --- a/src/db/schema.ts +++ b/src/db/schema.ts @@ -77,6 +77,8 @@ export const messages = sqliteTable("messages", { // The commit hash of the codebase at the time the message was sent commitHash: text("commit_hash"), requestId: text("request_id"), + // Max tokens used for this message (only for assistant messages) + maxTokensUsed: integer("max_tokens_used"), createdAt: integer("created_at", { mode: "timestamp" }) .notNull() .default(sql`(unixepoch())`), diff --git a/src/hooks/useCountTokens.ts b/src/hooks/useCountTokens.ts index 9964fd0..6318382 100644 --- a/src/hooks/useCountTokens.ts +++ b/src/hooks/useCountTokens.ts @@ -1,43 +1,62 @@ -import { useCallback } from "react"; -import { atom, useAtom } from "jotai"; +import { + keepPreviousData, + useQuery, + useQueryClient, +} from "@tanstack/react-query"; import { IpcClient } from "@/ipc/ipc_client"; import type { TokenCountResult } from "@/ipc/ipc_types"; +import { useCallback, useEffect, useState } from "react"; -// Create atoms to store the token count state -export const tokenCountResultAtom = atom(null); -export const tokenCountLoadingAtom = atom(false); -export const tokenCountErrorAtom = atom(null); +export const TOKEN_COUNT_QUERY_KEY = ["tokenCount"] as const; -export function useCountTokens() { - const [result, setResult] = useAtom(tokenCountResultAtom); - const [loading, setLoading] = useAtom(tokenCountLoadingAtom); - const [error, setError] = useAtom(tokenCountErrorAtom); +export function useCountTokens(chatId: number | null, input: string = "") { + const queryClient = useQueryClient(); - const countTokens = useCallback( - async (chatId: number, input: string) => { - setLoading(true); - setError(null); + // Debounce input so we don't call the token counting IPC on every keystroke. + const [debouncedInput, setDebouncedInput] = useState(input); - try { - const ipcClient = IpcClient.getInstance(); - const tokenResult = await ipcClient.countTokens({ chatId, input }); - setResult(tokenResult); - return tokenResult; - } catch (error) { - console.error("Error counting tokens:", error); - setError(error instanceof Error ? error : new Error(String(error))); - throw error; - } finally { - setLoading(false); - } + useEffect(() => { + // If there's no chat, don't bother debouncing + if (chatId === null) { + setDebouncedInput(input); + return; + } + + const handle = setTimeout(() => { + setDebouncedInput(input); + }, 1_000); + + return () => clearTimeout(handle); + }, [chatId, input]); + + const { + data: result = null, + isLoading: loading, + error, + refetch, + } = useQuery({ + queryKey: [...TOKEN_COUNT_QUERY_KEY, chatId, debouncedInput], + queryFn: async () => { + if (chatId === null) return null; + return IpcClient.getInstance().countTokens({ + chatId, + input: debouncedInput, + }); }, - [setLoading, setError, setResult], - ); + placeholderData: keepPreviousData, + enabled: chatId !== null, + }); + + // For imperative invalidation (e.g., after streaming completes) + const invalidateTokenCount = useCallback(() => { + queryClient.invalidateQueries({ queryKey: TOKEN_COUNT_QUERY_KEY }); + }, [queryClient]); return { - countTokens, result, loading, error, + refetch, + invalidateTokenCount, }; } diff --git a/src/hooks/useStreamChat.ts b/src/hooks/useStreamChat.ts index 2dcc131..430dbaa 100644 --- a/src/hooks/useStreamChat.ts +++ b/src/hooks/useStreamChat.ts @@ -49,7 +49,6 @@ export function useStreamChat({ const setStreamCountById = useSetAtom(chatStreamCountByIdAtom); const { refreshVersions } = useVersions(selectedAppId); const { refreshAppIframe } = useRunApp(); - const { countTokens } = useCountTokens(); const { refetchUserBudget } = useUserBudgetInfo(); const { checkProblems } = useCheckProblems(selectedAppId); const { settings } = useSettings(); @@ -62,6 +61,7 @@ export function useStreamChat({ chatId = id; } let { refreshProposal } = hasChatId ? useProposal(chatId) : useProposal(); + const { invalidateTokenCount } = useCountTokens(chatId ?? null, ""); const streamMessage = useCallback( async ({ @@ -154,7 +154,7 @@ export function useStreamChat({ refreshChats(); refreshApp(); refreshVersions(); - countTokens(chatId, ""); + invalidateTokenCount(); onSettled?.(); }, onError: (errorMessage: string) => { @@ -174,7 +174,7 @@ export function useStreamChat({ refreshChats(); refreshApp(); refreshVersions(); - countTokens(chatId, ""); + invalidateTokenCount(); onSettled?.(); }, }); diff --git a/src/ipc/handlers/chat_stream_handlers.ts b/src/ipc/handlers/chat_stream_handlers.ts index eeaf6d6..8134f17 100644 --- a/src/ipc/handlers/chat_stream_handlers.ts +++ b/src/ipc/handlers/chat_stream_handlers.ts @@ -448,6 +448,7 @@ ${componentSnippet} }); let fullResponse = ""; + let maxTokensUsed: number | undefined; // Check if this is a test prompt const testResponse = getTestResponse(req.prompt); @@ -885,7 +886,7 @@ This conversation includes one or more image attachments. When the user uploads } satisfies GoogleGenerativeAIProviderOptions; } - return streamText({ + const streamResult = streamText({ headers: isAnthropic ? { "anthropic-beta": "context-1m-2025-08-07", @@ -900,6 +901,33 @@ This conversation includes one or more image attachments. When the user uploads system: systemPromptOverride, tools, messages: chatMessages.filter((m) => m.content), + onFinish: (response) => { + const totalTokens = response.usage?.totalTokens; + + if (typeof totalTokens === "number") { + // We use the highest total tokens used (we are *not* accumulating) + // since we're trying to figure it out if we're near the context limit. + maxTokensUsed = Math.max(maxTokensUsed ?? 0, totalTokens); + + // Persist the aggregated token usage on the placeholder assistant message + void db + .update(messages) + .set({ maxTokensUsed: maxTokensUsed }) + .where(eq(messages.id, placeholderAssistantMessage.id)) + .catch((error) => { + logger.error( + "Failed to save total tokens for assistant message", + error, + ); + }); + + logger.log( + `Total tokens used (aggregated for message ${placeholderAssistantMessage.id}): ${maxTokensUsed}`, + ); + } else { + logger.log("Total tokens used: unknown"); + } + }, onError: (error: any) => { let errorMessage = (error as any)?.error?.message; const responseBody = error?.error?.responseBody; @@ -923,6 +951,10 @@ This conversation includes one or more image attachments. When the user uploads }, abortSignal: abortController.signal, }); + return { + fullStream: streamResult.fullStream, + usage: streamResult.usage, + }; }; let lastDbSaveAt = 0; diff --git a/src/ipc/handlers/token_count_handlers.ts b/src/ipc/handlers/token_count_handlers.ts index 6934941..240bc50 100644 --- a/src/ipc/handlers/token_count_handlers.ts +++ b/src/ipc/handlers/token_count_handlers.ts @@ -141,8 +141,15 @@ export function registerTokenCountHandlers() { codebaseTokens + mentionedAppsTokens; + // Find the last assistant message since totalTokens is only set on assistant messages + const lastAssistantMessage = [...chat.messages] + .reverse() + .find((m) => m.role === "assistant"); + const actualMaxTokens = lastAssistantMessage?.maxTokensUsed ?? null; + return { - totalTokens, + estimatedTotalTokens: totalTokens, + actualMaxTokens, messageHistoryTokens, codebaseTokens, mentionedAppsTokens, diff --git a/src/ipc/ipc_types.ts b/src/ipc/ipc_types.ts index 16f2f33..917e226 100644 --- a/src/ipc/ipc_types.ts +++ b/src/ipc/ipc_types.ts @@ -49,6 +49,8 @@ export interface ChatResponseEnd { updatedFiles: boolean; extraFiles?: string[]; extraFilesError?: string; + totalTokens?: number; + contextWindow?: number; } export interface ChatProblemsEvent { @@ -81,6 +83,7 @@ export interface Message { dbTimestamp?: string | null; createdAt?: Date | string; requestId?: string | null; + totalTokens?: number | null; } export interface Chat { @@ -167,7 +170,8 @@ export interface TokenCountParams { } export interface TokenCountResult { - totalTokens: number; + estimatedTotalTokens: number; + actualMaxTokens: number | null; messageHistoryTokens: number; codebaseTokens: number; mentionedAppsTokens: number; diff --git a/src/ipc/shared/language_model_constants.ts b/src/ipc/shared/language_model_constants.ts index 7cb7387..f36987d 100644 --- a/src/ipc/shared/language_model_constants.ts +++ b/src/ipc/shared/language_model_constants.ts @@ -306,11 +306,9 @@ export const MODEL_OPTIONS: Record = { displayName: "Auto", description: "Automatically selects the best model", tag: "Default", - // These are below Gemini 2.5 Pro & Flash limits - // which are the ones defaulted to for both regular auto - // and smart auto. + // The following is reasonable defaults. maxOutputTokens: 32_000, - contextWindow: 1_000_000, + contextWindow: 200_000, temperature: 0, }, { diff --git a/src/prompts/summarize_chat_system_prompt.ts b/src/prompts/summarize_chat_system_prompt.ts index e399ebc..1b60a47 100644 --- a/src/prompts/summarize_chat_system_prompt.ts +++ b/src/prompts/summarize_chat_system_prompt.ts @@ -1,8 +1,42 @@ export const SUMMARIZE_CHAT_SYSTEM_PROMPT = ` -You are a helpful assistant that understands long conversations and can summarize them in a few bullet points. +You are a helpful assistant that summarizes AI coding chat sessions with a focus on technical changes and file modifications. -I want you to write down the gist of the conversation in a few bullet points, focusing on the major changes, particularly -at the end of the conversation. +Your task is to analyze the conversation and provide: -Use for setting the chat summary (put this at the end). The chat summary should be less than a sentence, but more than a few words. YOU SHOULD ALWAYS INCLUDE EXACTLY ONE CHAT TITLE +1. **Chat Summary**: A concise summary (less than a sentence, more than a few words) that captures the primary objective or outcome of the session. + +2. **Major Changes**: Identify and highlight: + - Major code modifications, refactors, or new features implemented + - Critical bug fixes or debugging sessions + - Architecture or design pattern changes + - Important decisions made during the conversation + +3. **Relevant Files**: List the most important files discussed or modified, with brief context: + - Files that received significant changes + - New files created + - Files central to the discussion or problem-solving + - Format: \`path/to/file.ext - brief description of changes\` + +4. **Focus on Recency**: Prioritize changes and discussions from the latter part of the conversation, as these typically represent the final state or most recent decisions. + +**Output Format:** + +## Major Changes +- Bullet point of significant change 1 +- Bullet point of significant change 2 + +## Important Context +- Any critical decisions, trade-offs, or next steps discussed + +## Relevant Files +- \`file1.ts\` - Description of changes +- \`file2.py\` - Description of changes + + +[Your concise summary here - less than a sentence, more than a few words] + + +**Reminder:** + +YOU MUST ALWAYS INCLUDE EXACTLY ONE TAG AT THE END. `; diff --git a/testing/fake-llm-server/chatCompletionHandler.ts b/testing/fake-llm-server/chatCompletionHandler.ts index 32b2f4a..3ea25c8 100644 --- a/testing/fake-llm-server/chatCompletionHandler.ts +++ b/testing/fake-llm-server/chatCompletionHandler.ts @@ -371,6 +371,15 @@ export default Index; return; } + // Check for high token usage marker to simulate near context limit + const highTokensMatch = + typeof lastMessage?.content === "string" && + !lastMessage?.content.startsWith("Summarize the following chat:") && + lastMessage?.content?.match?.(/\[high-tokens=(\d+)\]/); + const highTokensValue = highTokensMatch + ? parseInt(highTokensMatch[1], 10) + : null; + // Split the message into characters to simulate streaming const messageChars = messageContent.split(""); @@ -388,8 +397,15 @@ export default Index; res.write(createStreamChunk(batch)); index += batchSize; } else { - // Send the final chunk - res.write(createStreamChunk("", "assistant", true)); + // Send the final chunk with optional usage info for high token simulation + const usage = highTokensValue + ? { + prompt_tokens: highTokensValue - 100, + completion_tokens: 100, + total_tokens: highTokensValue, + } + : undefined; + res.write(createStreamChunk("", "assistant", true, usage)); clearInterval(interval); res.end(); } diff --git a/testing/fake-llm-server/index.ts b/testing/fake-llm-server/index.ts index c5fa243..1b4c30a 100644 --- a/testing/fake-llm-server/index.ts +++ b/testing/fake-llm-server/index.ts @@ -29,8 +29,13 @@ export function createStreamChunk( content: string, role: string = "assistant", isLast: boolean = false, + usage?: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }, ) { - const chunk = { + const chunk: any = { id: `chatcmpl-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), @@ -44,6 +49,11 @@ export function createStreamChunk( ], }; + // Add usage info to the final chunk if provided + if (isLast && usage) { + chunk.usage = usage; + } + return `data: ${JSON.stringify(chunk)}\n\n${isLast ? "data: [DONE]\n\n" : ""}`; }