Summarize chat trigger (#1890)
<!-- CURSOR_SUMMARY --> > [!NOTE] > Adds a context-limit banner with one-click “summarize into new chat,” refactors token counting with react-query, and persists per-message max token usage. > > - **Chat UX** > - **Context limit banner** (`ContextLimitBanner.tsx`, `MessagesList.tsx`): shows when within 40k tokens of `contextWindow`, with tooltip and action to summarize into a new chat. > - **Summarize flow**: extracted to `useSummarizeInNewChat` and used in chat input and banner; new summarize system prompt (`summarize_chat_system_prompt.ts`). > - **Token usage & counting** > - **Persist max tokens used per assistant message**: DB migration (`messages.max_tokens_used`), schema updates, and saving usage during streaming (`chat_stream_handlers.ts`). > - **Token counting refactor** (`useCountTokens.ts`): react-query with debounce; returns `estimatedTotalTokens` and `actualMaxTokens`; invalidated on model change and stream end; `TokenBar` updated. > - **Surfacing usage**: tooltip on latest assistant message shows total tokens (`ChatMessage.tsx`). > - **Model/config tweaks** > - Set `auto` model `contextWindow` to `200_000` (`language_model_constants.ts`). > - Improve chat auto-scroll dependency (`ChatPanel.tsx`). > - Fix app path validation regex (`app_handlers.ts`). > - **Testing & dev server** > - E2E tests for banner and summarize (`e2e-tests/context_limit_banner.spec.ts` + fixtures/snapshot). > - Fake LLM server streams usage to simulate high token scenarios (`testing/fake-llm-server/*`). > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 2ae16a14d50699cc772407426419192c2fdf2ec3. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY --> <!-- This is an auto-generated description by cubic. --> --- ## Summary by cubic Adds a “Summarize into new chat” trigger and a context limit banner to help keep conversations focused and avoid hitting model limits. Also tracks and surfaces actual token usage per assistant message, with a token counting refactor for reliability. - **New Features** - Summarize into new chat from the input or banner; improved system prompt with clear output format. - Context limit banner shows when within 40k tokens of the model’s context window and offers a one-click summarize action. - Tooltip on the latest assistant message shows total tokens used. - **Refactors** - Token counting now uses react-query and returns estimatedTotalTokens and actualMaxTokens; counts are invalidated on model change and when streaming settles. - Persist per-message max_tokens_used in the messages table; backend aggregates model usage during streaming and saves it. - Adjusted default “Auto” model contextWindow to 200k for more realistic limits. - Improved chat scrolling while streaming; fixed app path validation regex. <sup>Written for commit 2ae16a14d50699cc772407426419192c2fdf2ec3. Summary will update automatically on new commits.</sup> <!-- End of auto-generated description by cubic. -->
This commit is contained in:
1
drizzle/0017_sharp_corsair.sql
Normal file
1
drizzle/0017_sharp_corsair.sql
Normal file
@@ -0,0 +1 @@
|
|||||||
|
ALTER TABLE `messages` ADD `max_tokens_used` integer;
|
||||||
767
drizzle/meta/0017_snapshot.json
Normal file
767
drizzle/meta/0017_snapshot.json
Normal file
@@ -0,0 +1,767 @@
|
|||||||
|
{
|
||||||
|
"version": "6",
|
||||||
|
"dialect": "sqlite",
|
||||||
|
"id": "071199d7-dfb5-4681-85b7-228f1de3123a",
|
||||||
|
"prevId": "c0a49147-ac92-4046-afe8-42f20df9314b",
|
||||||
|
"tables": {
|
||||||
|
"apps": {
|
||||||
|
"name": "apps",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": true
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"name": "name",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"path": {
|
||||||
|
"name": "path",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"name": "created_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
},
|
||||||
|
"updated_at": {
|
||||||
|
"name": "updated_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
},
|
||||||
|
"github_org": {
|
||||||
|
"name": "github_org",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"github_repo": {
|
||||||
|
"name": "github_repo",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"github_branch": {
|
||||||
|
"name": "github_branch",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"supabase_project_id": {
|
||||||
|
"name": "supabase_project_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"supabase_parent_project_id": {
|
||||||
|
"name": "supabase_parent_project_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"neon_project_id": {
|
||||||
|
"name": "neon_project_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"neon_development_branch_id": {
|
||||||
|
"name": "neon_development_branch_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"neon_preview_branch_id": {
|
||||||
|
"name": "neon_preview_branch_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"vercel_project_id": {
|
||||||
|
"name": "vercel_project_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"vercel_project_name": {
|
||||||
|
"name": "vercel_project_name",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"vercel_team_id": {
|
||||||
|
"name": "vercel_team_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"vercel_deployment_url": {
|
||||||
|
"name": "vercel_deployment_url",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"install_command": {
|
||||||
|
"name": "install_command",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"start_command": {
|
||||||
|
"name": "start_command",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"chat_context": {
|
||||||
|
"name": "chat_context",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"is_favorite": {
|
||||||
|
"name": "is_favorite",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"chats": {
|
||||||
|
"name": "chats",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": true
|
||||||
|
},
|
||||||
|
"app_id": {
|
||||||
|
"name": "app_id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"name": "title",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"initial_commit_hash": {
|
||||||
|
"name": "initial_commit_hash",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"name": "created_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {
|
||||||
|
"chats_app_id_apps_id_fk": {
|
||||||
|
"name": "chats_app_id_apps_id_fk",
|
||||||
|
"tableFrom": "chats",
|
||||||
|
"tableTo": "apps",
|
||||||
|
"columnsFrom": [
|
||||||
|
"app_id"
|
||||||
|
],
|
||||||
|
"columnsTo": [
|
||||||
|
"id"
|
||||||
|
],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"language_model_providers": {
|
||||||
|
"name": "language_model_providers",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"name": "name",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"api_base_url": {
|
||||||
|
"name": "api_base_url",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"env_var_name": {
|
||||||
|
"name": "env_var_name",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"name": "created_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
},
|
||||||
|
"updated_at": {
|
||||||
|
"name": "updated_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"language_models": {
|
||||||
|
"name": "language_models",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": true
|
||||||
|
},
|
||||||
|
"display_name": {
|
||||||
|
"name": "display_name",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"api_name": {
|
||||||
|
"name": "api_name",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"builtin_provider_id": {
|
||||||
|
"name": "builtin_provider_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"custom_provider_id": {
|
||||||
|
"name": "custom_provider_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"description": {
|
||||||
|
"name": "description",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"max_output_tokens": {
|
||||||
|
"name": "max_output_tokens",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"context_window": {
|
||||||
|
"name": "context_window",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"name": "created_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
},
|
||||||
|
"updated_at": {
|
||||||
|
"name": "updated_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {
|
||||||
|
"language_models_custom_provider_id_language_model_providers_id_fk": {
|
||||||
|
"name": "language_models_custom_provider_id_language_model_providers_id_fk",
|
||||||
|
"tableFrom": "language_models",
|
||||||
|
"tableTo": "language_model_providers",
|
||||||
|
"columnsFrom": [
|
||||||
|
"custom_provider_id"
|
||||||
|
],
|
||||||
|
"columnsTo": [
|
||||||
|
"id"
|
||||||
|
],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"mcp_servers": {
|
||||||
|
"name": "mcp_servers",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": true
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"name": "name",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"transport": {
|
||||||
|
"name": "transport",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"command": {
|
||||||
|
"name": "command",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"args": {
|
||||||
|
"name": "args",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"env_json": {
|
||||||
|
"name": "env_json",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"name": "url",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"enabled": {
|
||||||
|
"name": "enabled",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "0"
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"name": "created_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
},
|
||||||
|
"updated_at": {
|
||||||
|
"name": "updated_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"mcp_tool_consents": {
|
||||||
|
"name": "mcp_tool_consents",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": true
|
||||||
|
},
|
||||||
|
"server_id": {
|
||||||
|
"name": "server_id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"tool_name": {
|
||||||
|
"name": "tool_name",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"consent": {
|
||||||
|
"name": "consent",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "'ask'"
|
||||||
|
},
|
||||||
|
"updated_at": {
|
||||||
|
"name": "updated_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {
|
||||||
|
"uniq_mcp_consent": {
|
||||||
|
"name": "uniq_mcp_consent",
|
||||||
|
"columns": [
|
||||||
|
"server_id",
|
||||||
|
"tool_name"
|
||||||
|
],
|
||||||
|
"isUnique": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"foreignKeys": {
|
||||||
|
"mcp_tool_consents_server_id_mcp_servers_id_fk": {
|
||||||
|
"name": "mcp_tool_consents_server_id_mcp_servers_id_fk",
|
||||||
|
"tableFrom": "mcp_tool_consents",
|
||||||
|
"tableTo": "mcp_servers",
|
||||||
|
"columnsFrom": [
|
||||||
|
"server_id"
|
||||||
|
],
|
||||||
|
"columnsTo": [
|
||||||
|
"id"
|
||||||
|
],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"messages": {
|
||||||
|
"name": "messages",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": true
|
||||||
|
},
|
||||||
|
"chat_id": {
|
||||||
|
"name": "chat_id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"role": {
|
||||||
|
"name": "role",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"content": {
|
||||||
|
"name": "content",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"approval_state": {
|
||||||
|
"name": "approval_state",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"source_commit_hash": {
|
||||||
|
"name": "source_commit_hash",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"commit_hash": {
|
||||||
|
"name": "commit_hash",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"request_id": {
|
||||||
|
"name": "request_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"max_tokens_used": {
|
||||||
|
"name": "max_tokens_used",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"name": "created_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {
|
||||||
|
"messages_chat_id_chats_id_fk": {
|
||||||
|
"name": "messages_chat_id_chats_id_fk",
|
||||||
|
"tableFrom": "messages",
|
||||||
|
"tableTo": "chats",
|
||||||
|
"columnsFrom": [
|
||||||
|
"chat_id"
|
||||||
|
],
|
||||||
|
"columnsTo": [
|
||||||
|
"id"
|
||||||
|
],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"prompts": {
|
||||||
|
"name": "prompts",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": true
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"name": "title",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"description": {
|
||||||
|
"name": "description",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"content": {
|
||||||
|
"name": "content",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"name": "created_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
},
|
||||||
|
"updated_at": {
|
||||||
|
"name": "updated_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"versions": {
|
||||||
|
"name": "versions",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": true
|
||||||
|
},
|
||||||
|
"app_id": {
|
||||||
|
"name": "app_id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"commit_hash": {
|
||||||
|
"name": "commit_hash",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"neon_db_timestamp": {
|
||||||
|
"name": "neon_db_timestamp",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"name": "created_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
},
|
||||||
|
"updated_at": {
|
||||||
|
"name": "updated_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "(unixepoch())"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {
|
||||||
|
"versions_app_commit_unique": {
|
||||||
|
"name": "versions_app_commit_unique",
|
||||||
|
"columns": [
|
||||||
|
"app_id",
|
||||||
|
"commit_hash"
|
||||||
|
],
|
||||||
|
"isUnique": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"foreignKeys": {
|
||||||
|
"versions_app_id_apps_id_fk": {
|
||||||
|
"name": "versions_app_id_apps_id_fk",
|
||||||
|
"tableFrom": "versions",
|
||||||
|
"tableTo": "apps",
|
||||||
|
"columnsFrom": [
|
||||||
|
"app_id"
|
||||||
|
],
|
||||||
|
"columnsTo": [
|
||||||
|
"id"
|
||||||
|
],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"views": {},
|
||||||
|
"enums": {},
|
||||||
|
"_meta": {
|
||||||
|
"schemas": {},
|
||||||
|
"tables": {},
|
||||||
|
"columns": {}
|
||||||
|
},
|
||||||
|
"internal": {
|
||||||
|
"indexes": {}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -120,6 +120,13 @@
|
|||||||
"when": 1762297039106,
|
"when": 1762297039106,
|
||||||
"tag": "0016_petite_thanos",
|
"tag": "0016_petite_thanos",
|
||||||
"breakpoints": true
|
"breakpoints": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"idx": 17,
|
||||||
|
"version": "6",
|
||||||
|
"when": 1764804624402,
|
||||||
|
"tag": "0017_sharp_corsair",
|
||||||
|
"breakpoints": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
46
e2e-tests/context_limit_banner.spec.ts
Normal file
46
e2e-tests/context_limit_banner.spec.ts
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
import { test, Timeout } from "./helpers/test_helper";
|
||||||
|
import { expect } from "@playwright/test";
|
||||||
|
|
||||||
|
test("context limit banner appears and summarize works", async ({ po }) => {
|
||||||
|
await po.setUp();
|
||||||
|
|
||||||
|
// Send a message that triggers high token usage (110k tokens)
|
||||||
|
// With a default context window of 128k, this leaves only 18k tokens remaining
|
||||||
|
// which is below the 40k threshold to show the banner
|
||||||
|
await po.sendPrompt("tc=context-limit-response [high-tokens=110000]");
|
||||||
|
|
||||||
|
// Verify the context limit banner appears
|
||||||
|
const contextLimitBanner = po.page.getByTestId("context-limit-banner");
|
||||||
|
await expect(contextLimitBanner).toBeVisible({ timeout: Timeout.MEDIUM });
|
||||||
|
|
||||||
|
// Verify banner text
|
||||||
|
await expect(contextLimitBanner).toContainText(
|
||||||
|
"You're close to the context limit for this chat.",
|
||||||
|
);
|
||||||
|
|
||||||
|
// Click the summarize button
|
||||||
|
await contextLimitBanner
|
||||||
|
.getByRole("button", { name: "Summarize into new chat" })
|
||||||
|
.click();
|
||||||
|
|
||||||
|
// Wait for the new chat to load and message to complete
|
||||||
|
await po.waitForChatCompletion();
|
||||||
|
|
||||||
|
// Snapshot the messages in the new chat
|
||||||
|
await po.snapshotMessages();
|
||||||
|
});
|
||||||
|
|
||||||
|
test("context limit banner does not appear when within limit", async ({
|
||||||
|
po,
|
||||||
|
}) => {
|
||||||
|
await po.setUp();
|
||||||
|
|
||||||
|
// Send a message with low token usage (50k tokens)
|
||||||
|
// With a 128k context window, this leaves 78k tokens remaining
|
||||||
|
// which is above the 40k threshold - banner should NOT appear
|
||||||
|
await po.sendPrompt("tc=context-limit-response [high-tokens=50000]");
|
||||||
|
|
||||||
|
// Verify the context limit banner does NOT appear
|
||||||
|
const contextLimitBanner = po.page.getByTestId("context-limit-banner");
|
||||||
|
await expect(contextLimitBanner).not.toBeVisible();
|
||||||
|
});
|
||||||
4
e2e-tests/fixtures/context-limit-response.md
Normal file
4
e2e-tests/fixtures/context-limit-response.md
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
Here is a simple response to test the context limit banner functionality.
|
||||||
|
|
||||||
|
This message simulates being close to the model's context window limit.
|
||||||
|
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
- paragraph: Summarize from chat-id=1
|
||||||
|
- img
|
||||||
|
- text: file1.txt
|
||||||
|
- button "Edit":
|
||||||
|
- img
|
||||||
|
- img
|
||||||
|
- text: file1.txt
|
||||||
|
- paragraph: More EOM
|
||||||
|
- button:
|
||||||
|
- img
|
||||||
|
- img
|
||||||
|
- text: less than a minute ago
|
||||||
|
- button "Retry":
|
||||||
|
- img
|
||||||
@@ -97,7 +97,11 @@ export function ChatPanel({
|
|||||||
const streamCount = chatId ? (streamCountById.get(chatId) ?? 0) : 0;
|
const streamCount = chatId ? (streamCountById.get(chatId) ?? 0) : 0;
|
||||||
console.log("streamCount - scrolling to bottom", streamCount);
|
console.log("streamCount - scrolling to bottom", streamCount);
|
||||||
scrollToBottom();
|
scrollToBottom();
|
||||||
}, [chatId, chatId ? (streamCountById.get(chatId) ?? 0) : 0]);
|
}, [
|
||||||
|
chatId,
|
||||||
|
chatId ? (streamCountById.get(chatId) ?? 0) : 0,
|
||||||
|
chatId ? (isStreamingById.get(chatId) ?? false) : false,
|
||||||
|
]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const container = messagesContainerRef.current;
|
const container = messagesContainerRef.current;
|
||||||
|
|||||||
@@ -27,11 +27,17 @@ import { useSettings } from "@/hooks/useSettings";
|
|||||||
import { PriceBadge } from "@/components/PriceBadge";
|
import { PriceBadge } from "@/components/PriceBadge";
|
||||||
import { TURBO_MODELS } from "@/ipc/shared/language_model_constants";
|
import { TURBO_MODELS } from "@/ipc/shared/language_model_constants";
|
||||||
import { cn } from "@/lib/utils";
|
import { cn } from "@/lib/utils";
|
||||||
|
import { useQueryClient } from "@tanstack/react-query";
|
||||||
|
import { TOKEN_COUNT_QUERY_KEY } from "@/hooks/useCountTokens";
|
||||||
|
|
||||||
export function ModelPicker() {
|
export function ModelPicker() {
|
||||||
const { settings, updateSettings } = useSettings();
|
const { settings, updateSettings } = useSettings();
|
||||||
|
const queryClient = useQueryClient();
|
||||||
const onModelSelect = (model: LargeLanguageModel) => {
|
const onModelSelect = (model: LargeLanguageModel) => {
|
||||||
updateSettings({ selectedModel: model });
|
updateSettings({ selectedModel: model });
|
||||||
|
// Invalidate token count when model changes since different models have different context windows
|
||||||
|
// (technically they have different tokenizers, but we don't keep track of that).
|
||||||
|
queryClient.invalidateQueries({ queryKey: TOKEN_COUNT_QUERY_KEY });
|
||||||
};
|
};
|
||||||
|
|
||||||
const [open, setOpen] = useState(false);
|
const [open, setOpen] = useState(false);
|
||||||
|
|||||||
@@ -52,13 +52,14 @@ import {
|
|||||||
TooltipProvider,
|
TooltipProvider,
|
||||||
TooltipTrigger,
|
TooltipTrigger,
|
||||||
} from "../ui/tooltip";
|
} from "../ui/tooltip";
|
||||||
import { useNavigate } from "@tanstack/react-router";
|
|
||||||
import { useVersions } from "@/hooks/useVersions";
|
import { useVersions } from "@/hooks/useVersions";
|
||||||
import { useAttachments } from "@/hooks/useAttachments";
|
import { useAttachments } from "@/hooks/useAttachments";
|
||||||
import { AttachmentsList } from "./AttachmentsList";
|
import { AttachmentsList } from "./AttachmentsList";
|
||||||
import { DragDropOverlay } from "./DragDropOverlay";
|
import { DragDropOverlay } from "./DragDropOverlay";
|
||||||
import { FileAttachmentDropdown } from "./FileAttachmentDropdown";
|
import { FileAttachmentDropdown } from "./FileAttachmentDropdown";
|
||||||
import { showError, showExtraFilesToast } from "@/lib/toast";
|
import { showExtraFilesToast } from "@/lib/toast";
|
||||||
|
import { useSummarizeInNewChat } from "./SummarizeInNewChatButton";
|
||||||
import { ChatInputControls } from "../ChatInputControls";
|
import { ChatInputControls } from "../ChatInputControls";
|
||||||
import { ChatErrorBox } from "./ChatErrorBox";
|
import { ChatErrorBox } from "./ChatErrorBox";
|
||||||
import {
|
import {
|
||||||
@@ -419,30 +420,10 @@ function SuggestionButton({
|
|||||||
}
|
}
|
||||||
|
|
||||||
function SummarizeInNewChatButton() {
|
function SummarizeInNewChatButton() {
|
||||||
const chatId = useAtomValue(selectedChatIdAtom);
|
const { handleSummarize } = useSummarizeInNewChat();
|
||||||
const appId = useAtomValue(selectedAppIdAtom);
|
|
||||||
const { streamMessage } = useStreamChat();
|
|
||||||
const navigate = useNavigate();
|
|
||||||
const onClick = async () => {
|
|
||||||
if (!appId) {
|
|
||||||
console.error("No app id found");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
const newChatId = await IpcClient.getInstance().createChat(appId);
|
|
||||||
// navigate to new chat
|
|
||||||
await navigate({ to: "/chat", search: { id: newChatId } });
|
|
||||||
await streamMessage({
|
|
||||||
prompt: "Summarize from chat-id=" + chatId,
|
|
||||||
chatId: newChatId,
|
|
||||||
});
|
|
||||||
} catch (err) {
|
|
||||||
showError(err);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
return (
|
return (
|
||||||
<SuggestionButton
|
<SuggestionButton
|
||||||
onClick={onClick}
|
onClick={handleSummarize}
|
||||||
tooltipText="Creating a new chat makes the AI more focused and efficient"
|
tooltipText="Creating a new chat makes the AI more focused and efficient"
|
||||||
>
|
>
|
||||||
Summarize to new chat
|
Summarize to new chat
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import {
|
|||||||
GitCommit,
|
GitCommit,
|
||||||
Copy,
|
Copy,
|
||||||
Check,
|
Check,
|
||||||
|
Info,
|
||||||
} from "lucide-react";
|
} from "lucide-react";
|
||||||
import { formatDistanceToNow, format } from "date-fns";
|
import { formatDistanceToNow, format } from "date-fns";
|
||||||
import { useVersions } from "@/hooks/useVersions";
|
import { useVersions } from "@/hooks/useVersions";
|
||||||
@@ -277,6 +278,20 @@ const ChatMessage = ({ message, isLastMessage }: ChatMessageProps) => {
|
|||||||
</Tooltip>
|
</Tooltip>
|
||||||
</TooltipProvider>
|
</TooltipProvider>
|
||||||
)}
|
)}
|
||||||
|
{isLastMessage && message.totalTokens && (
|
||||||
|
<TooltipProvider>
|
||||||
|
<Tooltip>
|
||||||
|
<TooltipTrigger asChild>
|
||||||
|
<div className="flex items-center space-x-1 px-1 py-0.5">
|
||||||
|
<Info className="h-3 w-3" />
|
||||||
|
</div>
|
||||||
|
</TooltipTrigger>
|
||||||
|
<TooltipContent>
|
||||||
|
Max tokens used: {message.totalTokens.toLocaleString()}
|
||||||
|
</TooltipContent>
|
||||||
|
</Tooltip>
|
||||||
|
</TooltipProvider>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
89
src/components/chat/ContextLimitBanner.tsx
Normal file
89
src/components/chat/ContextLimitBanner.tsx
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
import { AlertTriangle, ArrowRight } from "lucide-react";
|
||||||
|
import { Button } from "@/components/ui/button";
|
||||||
|
import {
|
||||||
|
Tooltip,
|
||||||
|
TooltipContent,
|
||||||
|
TooltipTrigger,
|
||||||
|
} from "@/components/ui/tooltip";
|
||||||
|
import { useSummarizeInNewChat } from "./SummarizeInNewChatButton";
|
||||||
|
|
||||||
|
const CONTEXT_LIMIT_THRESHOLD = 40_000;
|
||||||
|
|
||||||
|
interface ContextLimitBannerProps {
|
||||||
|
totalTokens?: number | null;
|
||||||
|
contextWindow?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatTokenCount(count: number): string {
|
||||||
|
if (count >= 1000) {
|
||||||
|
return `${(count / 1000).toFixed(1)}k`.replace(".0k", "k");
|
||||||
|
}
|
||||||
|
return count.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function ContextLimitBanner({
|
||||||
|
totalTokens,
|
||||||
|
contextWindow,
|
||||||
|
}: ContextLimitBannerProps) {
|
||||||
|
const { handleSummarize } = useSummarizeInNewChat();
|
||||||
|
|
||||||
|
// Don't show banner if we don't have the necessary data
|
||||||
|
if (!totalTokens || !contextWindow) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we're within 40k tokens of the context limit
|
||||||
|
const tokensRemaining = contextWindow - totalTokens;
|
||||||
|
if (tokensRemaining > CONTEXT_LIMIT_THRESHOLD) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className="mx-auto max-w-3xl my-3 p-2 rounded-lg border border-amber-500/30 bg-amber-500/10 flex flex-col gap-2"
|
||||||
|
data-testid="context-limit-banner"
|
||||||
|
>
|
||||||
|
<div className="flex items-center gap-2 text-amber-600 dark:text-amber-400">
|
||||||
|
<Tooltip>
|
||||||
|
<TooltipTrigger asChild>
|
||||||
|
<Button
|
||||||
|
variant="ghost"
|
||||||
|
size="icon"
|
||||||
|
className="h-5 w-5 p-0 hover:bg-transparent text-amber-600 dark:text-amber-400 cursor-help"
|
||||||
|
>
|
||||||
|
<AlertTriangle className="h-4 w-4 shrink-0" />
|
||||||
|
</Button>
|
||||||
|
</TooltipTrigger>
|
||||||
|
<TooltipContent className="w-auto p-2 text-xs" side="top">
|
||||||
|
<div className="grid gap-1">
|
||||||
|
<div className="flex justify-between gap-4">
|
||||||
|
<span>Used:</span>
|
||||||
|
<span className="font-medium">
|
||||||
|
{formatTokenCount(totalTokens)}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div className="flex justify-between gap-4">
|
||||||
|
<span>Limit:</span>
|
||||||
|
<span className="font-medium">
|
||||||
|
{formatTokenCount(contextWindow)}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</TooltipContent>
|
||||||
|
</Tooltip>
|
||||||
|
<p className="text-sm font-medium">
|
||||||
|
You're close to the context limit for this chat.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<Button
|
||||||
|
onClick={handleSummarize}
|
||||||
|
variant="outline"
|
||||||
|
size="sm"
|
||||||
|
className="h-8 border-amber-500/50 hover:bg-amber-500/20 hover:border-amber-500 text-amber-600 dark:text-amber-400"
|
||||||
|
>
|
||||||
|
Summarize into new chat
|
||||||
|
<ArrowRight className="h-3 w-3 ml-2" />
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -18,6 +18,8 @@ import { useLanguageModelProviders } from "@/hooks/useLanguageModelProviders";
|
|||||||
import { useSettings } from "@/hooks/useSettings";
|
import { useSettings } from "@/hooks/useSettings";
|
||||||
import { useUserBudgetInfo } from "@/hooks/useUserBudgetInfo";
|
import { useUserBudgetInfo } from "@/hooks/useUserBudgetInfo";
|
||||||
import { PromoMessage } from "./PromoMessage";
|
import { PromoMessage } from "./PromoMessage";
|
||||||
|
import { ContextLimitBanner } from "./ContextLimitBanner";
|
||||||
|
import { useCountTokens } from "@/hooks/useCountTokens";
|
||||||
|
|
||||||
interface MessagesListProps {
|
interface MessagesListProps {
|
||||||
messages: Message[];
|
messages: Message[];
|
||||||
@@ -36,6 +38,11 @@ export const MessagesList = forwardRef<HTMLDivElement, MessagesListProps>(
|
|||||||
const [isRetryLoading, setIsRetryLoading] = useState(false);
|
const [isRetryLoading, setIsRetryLoading] = useState(false);
|
||||||
const selectedChatId = useAtomValue(selectedChatIdAtom);
|
const selectedChatId = useAtomValue(selectedChatIdAtom);
|
||||||
const { userBudget } = useUserBudgetInfo();
|
const { userBudget } = useUserBudgetInfo();
|
||||||
|
// Only fetch token count when not streaming
|
||||||
|
const { result: tokenCountResult } = useCountTokens(
|
||||||
|
!isStreaming ? selectedChatId : null,
|
||||||
|
"",
|
||||||
|
);
|
||||||
|
|
||||||
const renderSetupBanner = () => {
|
const renderSetupBanner = () => {
|
||||||
const selectedModel = settings?.selectedModel;
|
const selectedModel = settings?.selectedModel;
|
||||||
@@ -73,6 +80,13 @@ export const MessagesList = forwardRef<HTMLDivElement, MessagesListProps>(
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
{/* Show context limit banner when close to token limit */}
|
||||||
|
{!isStreaming && tokenCountResult && (
|
||||||
|
<ContextLimitBanner
|
||||||
|
totalTokens={tokenCountResult.actualMaxTokens}
|
||||||
|
contextWindow={tokenCountResult.contextWindow}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
{!isStreaming && (
|
{!isStreaming && (
|
||||||
<div className="flex max-w-3xl mx-auto gap-2">
|
<div className="flex max-w-3xl mx-auto gap-2">
|
||||||
{!!messages.length &&
|
{!!messages.length &&
|
||||||
|
|||||||
38
src/components/chat/SummarizeInNewChatButton.tsx
Normal file
38
src/components/chat/SummarizeInNewChatButton.tsx
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
import { useNavigate } from "@tanstack/react-router";
|
||||||
|
import { useAtomValue } from "jotai";
|
||||||
|
import { selectedChatIdAtom } from "@/atoms/chatAtoms";
|
||||||
|
import { selectedAppIdAtom } from "@/atoms/appAtoms";
|
||||||
|
import { useStreamChat } from "@/hooks/useStreamChat";
|
||||||
|
import { IpcClient } from "@/ipc/ipc_client";
|
||||||
|
import { showError } from "@/lib/toast";
|
||||||
|
|
||||||
|
export function useSummarizeInNewChat() {
|
||||||
|
const chatId = useAtomValue(selectedChatIdAtom);
|
||||||
|
const appId = useAtomValue(selectedAppIdAtom);
|
||||||
|
const { streamMessage } = useStreamChat();
|
||||||
|
const navigate = useNavigate();
|
||||||
|
|
||||||
|
const handleSummarize = async () => {
|
||||||
|
if (!appId) {
|
||||||
|
console.error("No app id found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!chatId) {
|
||||||
|
console.error("No chat id found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const newChatId = await IpcClient.getInstance().createChat(appId);
|
||||||
|
// navigate to new chat
|
||||||
|
await navigate({ to: "/chat", search: { id: newChatId } });
|
||||||
|
await streamMessage({
|
||||||
|
prompt: "Summarize from chat-id=" + chatId,
|
||||||
|
chatId: newChatId,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
showError(err);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return { handleSummarize };
|
||||||
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
import React, { useEffect, useState } from "react";
|
import React from "react";
|
||||||
import {
|
import {
|
||||||
Tooltip,
|
Tooltip,
|
||||||
TooltipContent,
|
TooltipContent,
|
||||||
@@ -24,31 +24,15 @@ interface TokenBarProps {
|
|||||||
|
|
||||||
export function TokenBar({ chatId }: TokenBarProps) {
|
export function TokenBar({ chatId }: TokenBarProps) {
|
||||||
const [inputValue] = useAtom(chatInputValueAtom);
|
const [inputValue] = useAtom(chatInputValueAtom);
|
||||||
const { countTokens, result } = useCountTokens();
|
|
||||||
const [error, setError] = useState<string | null>(null);
|
|
||||||
const { settings } = useSettings();
|
const { settings } = useSettings();
|
||||||
useEffect(() => {
|
const { result, error } = useCountTokens(chatId ?? null, inputValue);
|
||||||
if (!chatId) return;
|
|
||||||
// Mark this as used, we need to re-trigger token count
|
|
||||||
// when selected model changes.
|
|
||||||
void settings?.selectedModel;
|
|
||||||
|
|
||||||
const debounceTimer = setTimeout(() => {
|
|
||||||
countTokens(chatId, inputValue).catch((err) => {
|
|
||||||
setError("Failed to count tokens");
|
|
||||||
console.error("Token counting error:", err);
|
|
||||||
});
|
|
||||||
}, 500);
|
|
||||||
|
|
||||||
return () => clearTimeout(debounceTimer);
|
|
||||||
}, [chatId, inputValue, countTokens, settings?.selectedModel]);
|
|
||||||
|
|
||||||
if (!chatId || !result) {
|
if (!chatId || !result) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const {
|
const {
|
||||||
totalTokens,
|
estimatedTotalTokens: totalTokens,
|
||||||
messageHistoryTokens,
|
messageHistoryTokens,
|
||||||
codebaseTokens,
|
codebaseTokens,
|
||||||
mentionedAppsTokens,
|
mentionedAppsTokens,
|
||||||
@@ -142,7 +126,9 @@ export function TokenBar({ chatId }: TokenBarProps) {
|
|||||||
</TooltipContent>
|
</TooltipContent>
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
</TooltipProvider>
|
</TooltipProvider>
|
||||||
{error && <div className="text-red-500 text-xs mt-1">{error}</div>}
|
{error && (
|
||||||
|
<div className="text-red-500 text-xs mt-1">Failed to count tokens</div>
|
||||||
|
)}
|
||||||
{(!settings?.enableProSmartFilesContextMode ||
|
{(!settings?.enableProSmartFilesContextMode ||
|
||||||
!settings?.enableDyadPro) && (
|
!settings?.enableDyadPro) && (
|
||||||
<div className="text-xs text-center text-muted-foreground mt-2">
|
<div className="text-xs text-center text-muted-foreground mt-2">
|
||||||
|
|||||||
@@ -77,6 +77,8 @@ export const messages = sqliteTable("messages", {
|
|||||||
// The commit hash of the codebase at the time the message was sent
|
// The commit hash of the codebase at the time the message was sent
|
||||||
commitHash: text("commit_hash"),
|
commitHash: text("commit_hash"),
|
||||||
requestId: text("request_id"),
|
requestId: text("request_id"),
|
||||||
|
// Max tokens used for this message (only for assistant messages)
|
||||||
|
maxTokensUsed: integer("max_tokens_used"),
|
||||||
createdAt: integer("created_at", { mode: "timestamp" })
|
createdAt: integer("created_at", { mode: "timestamp" })
|
||||||
.notNull()
|
.notNull()
|
||||||
.default(sql`(unixepoch())`),
|
.default(sql`(unixepoch())`),
|
||||||
|
|||||||
@@ -1,43 +1,62 @@
|
|||||||
import { useCallback } from "react";
|
import {
|
||||||
import { atom, useAtom } from "jotai";
|
keepPreviousData,
|
||||||
|
useQuery,
|
||||||
|
useQueryClient,
|
||||||
|
} from "@tanstack/react-query";
|
||||||
import { IpcClient } from "@/ipc/ipc_client";
|
import { IpcClient } from "@/ipc/ipc_client";
|
||||||
import type { TokenCountResult } from "@/ipc/ipc_types";
|
import type { TokenCountResult } from "@/ipc/ipc_types";
|
||||||
|
import { useCallback, useEffect, useState } from "react";
|
||||||
|
|
||||||
// Create atoms to store the token count state
|
export const TOKEN_COUNT_QUERY_KEY = ["tokenCount"] as const;
|
||||||
export const tokenCountResultAtom = atom<TokenCountResult | null>(null);
|
|
||||||
export const tokenCountLoadingAtom = atom<boolean>(false);
|
|
||||||
export const tokenCountErrorAtom = atom<Error | null>(null);
|
|
||||||
|
|
||||||
export function useCountTokens() {
|
export function useCountTokens(chatId: number | null, input: string = "") {
|
||||||
const [result, setResult] = useAtom(tokenCountResultAtom);
|
const queryClient = useQueryClient();
|
||||||
const [loading, setLoading] = useAtom(tokenCountLoadingAtom);
|
|
||||||
const [error, setError] = useAtom(tokenCountErrorAtom);
|
|
||||||
|
|
||||||
const countTokens = useCallback(
|
// Debounce input so we don't call the token counting IPC on every keystroke.
|
||||||
async (chatId: number, input: string) => {
|
const [debouncedInput, setDebouncedInput] = useState(input);
|
||||||
setLoading(true);
|
|
||||||
setError(null);
|
|
||||||
|
|
||||||
try {
|
useEffect(() => {
|
||||||
const ipcClient = IpcClient.getInstance();
|
// If there's no chat, don't bother debouncing
|
||||||
const tokenResult = await ipcClient.countTokens({ chatId, input });
|
if (chatId === null) {
|
||||||
setResult(tokenResult);
|
setDebouncedInput(input);
|
||||||
return tokenResult;
|
return;
|
||||||
} catch (error) {
|
}
|
||||||
console.error("Error counting tokens:", error);
|
|
||||||
setError(error instanceof Error ? error : new Error(String(error)));
|
const handle = setTimeout(() => {
|
||||||
throw error;
|
setDebouncedInput(input);
|
||||||
} finally {
|
}, 1_000);
|
||||||
setLoading(false);
|
|
||||||
}
|
return () => clearTimeout(handle);
|
||||||
|
}, [chatId, input]);
|
||||||
|
|
||||||
|
const {
|
||||||
|
data: result = null,
|
||||||
|
isLoading: loading,
|
||||||
|
error,
|
||||||
|
refetch,
|
||||||
|
} = useQuery<TokenCountResult | null>({
|
||||||
|
queryKey: [...TOKEN_COUNT_QUERY_KEY, chatId, debouncedInput],
|
||||||
|
queryFn: async () => {
|
||||||
|
if (chatId === null) return null;
|
||||||
|
return IpcClient.getInstance().countTokens({
|
||||||
|
chatId,
|
||||||
|
input: debouncedInput,
|
||||||
|
});
|
||||||
},
|
},
|
||||||
[setLoading, setError, setResult],
|
placeholderData: keepPreviousData,
|
||||||
);
|
enabled: chatId !== null,
|
||||||
|
});
|
||||||
|
|
||||||
|
// For imperative invalidation (e.g., after streaming completes)
|
||||||
|
const invalidateTokenCount = useCallback(() => {
|
||||||
|
queryClient.invalidateQueries({ queryKey: TOKEN_COUNT_QUERY_KEY });
|
||||||
|
}, [queryClient]);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
countTokens,
|
|
||||||
result,
|
result,
|
||||||
loading,
|
loading,
|
||||||
error,
|
error,
|
||||||
|
refetch,
|
||||||
|
invalidateTokenCount,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,7 +49,6 @@ export function useStreamChat({
|
|||||||
const setStreamCountById = useSetAtom(chatStreamCountByIdAtom);
|
const setStreamCountById = useSetAtom(chatStreamCountByIdAtom);
|
||||||
const { refreshVersions } = useVersions(selectedAppId);
|
const { refreshVersions } = useVersions(selectedAppId);
|
||||||
const { refreshAppIframe } = useRunApp();
|
const { refreshAppIframe } = useRunApp();
|
||||||
const { countTokens } = useCountTokens();
|
|
||||||
const { refetchUserBudget } = useUserBudgetInfo();
|
const { refetchUserBudget } = useUserBudgetInfo();
|
||||||
const { checkProblems } = useCheckProblems(selectedAppId);
|
const { checkProblems } = useCheckProblems(selectedAppId);
|
||||||
const { settings } = useSettings();
|
const { settings } = useSettings();
|
||||||
@@ -62,6 +61,7 @@ export function useStreamChat({
|
|||||||
chatId = id;
|
chatId = id;
|
||||||
}
|
}
|
||||||
let { refreshProposal } = hasChatId ? useProposal(chatId) : useProposal();
|
let { refreshProposal } = hasChatId ? useProposal(chatId) : useProposal();
|
||||||
|
const { invalidateTokenCount } = useCountTokens(chatId ?? null, "");
|
||||||
|
|
||||||
const streamMessage = useCallback(
|
const streamMessage = useCallback(
|
||||||
async ({
|
async ({
|
||||||
@@ -154,7 +154,7 @@ export function useStreamChat({
|
|||||||
refreshChats();
|
refreshChats();
|
||||||
refreshApp();
|
refreshApp();
|
||||||
refreshVersions();
|
refreshVersions();
|
||||||
countTokens(chatId, "");
|
invalidateTokenCount();
|
||||||
onSettled?.();
|
onSettled?.();
|
||||||
},
|
},
|
||||||
onError: (errorMessage: string) => {
|
onError: (errorMessage: string) => {
|
||||||
@@ -174,7 +174,7 @@ export function useStreamChat({
|
|||||||
refreshChats();
|
refreshChats();
|
||||||
refreshApp();
|
refreshApp();
|
||||||
refreshVersions();
|
refreshVersions();
|
||||||
countTokens(chatId, "");
|
invalidateTokenCount();
|
||||||
onSettled?.();
|
onSettled?.();
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -448,6 +448,7 @@ ${componentSnippet}
|
|||||||
});
|
});
|
||||||
|
|
||||||
let fullResponse = "";
|
let fullResponse = "";
|
||||||
|
let maxTokensUsed: number | undefined;
|
||||||
|
|
||||||
// Check if this is a test prompt
|
// Check if this is a test prompt
|
||||||
const testResponse = getTestResponse(req.prompt);
|
const testResponse = getTestResponse(req.prompt);
|
||||||
@@ -885,7 +886,7 @@ This conversation includes one or more image attachments. When the user uploads
|
|||||||
} satisfies GoogleGenerativeAIProviderOptions;
|
} satisfies GoogleGenerativeAIProviderOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
return streamText({
|
const streamResult = streamText({
|
||||||
headers: isAnthropic
|
headers: isAnthropic
|
||||||
? {
|
? {
|
||||||
"anthropic-beta": "context-1m-2025-08-07",
|
"anthropic-beta": "context-1m-2025-08-07",
|
||||||
@@ -900,6 +901,33 @@ This conversation includes one or more image attachments. When the user uploads
|
|||||||
system: systemPromptOverride,
|
system: systemPromptOverride,
|
||||||
tools,
|
tools,
|
||||||
messages: chatMessages.filter((m) => m.content),
|
messages: chatMessages.filter((m) => m.content),
|
||||||
|
onFinish: (response) => {
|
||||||
|
const totalTokens = response.usage?.totalTokens;
|
||||||
|
|
||||||
|
if (typeof totalTokens === "number") {
|
||||||
|
// We use the highest total tokens used (we are *not* accumulating)
|
||||||
|
// since we're trying to figure it out if we're near the context limit.
|
||||||
|
maxTokensUsed = Math.max(maxTokensUsed ?? 0, totalTokens);
|
||||||
|
|
||||||
|
// Persist the aggregated token usage on the placeholder assistant message
|
||||||
|
void db
|
||||||
|
.update(messages)
|
||||||
|
.set({ maxTokensUsed: maxTokensUsed })
|
||||||
|
.where(eq(messages.id, placeholderAssistantMessage.id))
|
||||||
|
.catch((error) => {
|
||||||
|
logger.error(
|
||||||
|
"Failed to save total tokens for assistant message",
|
||||||
|
error,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.log(
|
||||||
|
`Total tokens used (aggregated for message ${placeholderAssistantMessage.id}): ${maxTokensUsed}`,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
logger.log("Total tokens used: unknown");
|
||||||
|
}
|
||||||
|
},
|
||||||
onError: (error: any) => {
|
onError: (error: any) => {
|
||||||
let errorMessage = (error as any)?.error?.message;
|
let errorMessage = (error as any)?.error?.message;
|
||||||
const responseBody = error?.error?.responseBody;
|
const responseBody = error?.error?.responseBody;
|
||||||
@@ -923,6 +951,10 @@ This conversation includes one or more image attachments. When the user uploads
|
|||||||
},
|
},
|
||||||
abortSignal: abortController.signal,
|
abortSignal: abortController.signal,
|
||||||
});
|
});
|
||||||
|
return {
|
||||||
|
fullStream: streamResult.fullStream,
|
||||||
|
usage: streamResult.usage,
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
let lastDbSaveAt = 0;
|
let lastDbSaveAt = 0;
|
||||||
|
|||||||
@@ -141,8 +141,15 @@ export function registerTokenCountHandlers() {
|
|||||||
codebaseTokens +
|
codebaseTokens +
|
||||||
mentionedAppsTokens;
|
mentionedAppsTokens;
|
||||||
|
|
||||||
|
// Find the last assistant message since totalTokens is only set on assistant messages
|
||||||
|
const lastAssistantMessage = [...chat.messages]
|
||||||
|
.reverse()
|
||||||
|
.find((m) => m.role === "assistant");
|
||||||
|
const actualMaxTokens = lastAssistantMessage?.maxTokensUsed ?? null;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
totalTokens,
|
estimatedTotalTokens: totalTokens,
|
||||||
|
actualMaxTokens,
|
||||||
messageHistoryTokens,
|
messageHistoryTokens,
|
||||||
codebaseTokens,
|
codebaseTokens,
|
||||||
mentionedAppsTokens,
|
mentionedAppsTokens,
|
||||||
|
|||||||
@@ -49,6 +49,8 @@ export interface ChatResponseEnd {
|
|||||||
updatedFiles: boolean;
|
updatedFiles: boolean;
|
||||||
extraFiles?: string[];
|
extraFiles?: string[];
|
||||||
extraFilesError?: string;
|
extraFilesError?: string;
|
||||||
|
totalTokens?: number;
|
||||||
|
contextWindow?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ChatProblemsEvent {
|
export interface ChatProblemsEvent {
|
||||||
@@ -81,6 +83,7 @@ export interface Message {
|
|||||||
dbTimestamp?: string | null;
|
dbTimestamp?: string | null;
|
||||||
createdAt?: Date | string;
|
createdAt?: Date | string;
|
||||||
requestId?: string | null;
|
requestId?: string | null;
|
||||||
|
totalTokens?: number | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Chat {
|
export interface Chat {
|
||||||
@@ -167,7 +170,8 @@ export interface TokenCountParams {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export interface TokenCountResult {
|
export interface TokenCountResult {
|
||||||
totalTokens: number;
|
estimatedTotalTokens: number;
|
||||||
|
actualMaxTokens: number | null;
|
||||||
messageHistoryTokens: number;
|
messageHistoryTokens: number;
|
||||||
codebaseTokens: number;
|
codebaseTokens: number;
|
||||||
mentionedAppsTokens: number;
|
mentionedAppsTokens: number;
|
||||||
|
|||||||
@@ -306,11 +306,9 @@ export const MODEL_OPTIONS: Record<string, ModelOption[]> = {
|
|||||||
displayName: "Auto",
|
displayName: "Auto",
|
||||||
description: "Automatically selects the best model",
|
description: "Automatically selects the best model",
|
||||||
tag: "Default",
|
tag: "Default",
|
||||||
// These are below Gemini 2.5 Pro & Flash limits
|
// The following is reasonable defaults.
|
||||||
// which are the ones defaulted to for both regular auto
|
|
||||||
// and smart auto.
|
|
||||||
maxOutputTokens: 32_000,
|
maxOutputTokens: 32_000,
|
||||||
contextWindow: 1_000_000,
|
contextWindow: 200_000,
|
||||||
temperature: 0,
|
temperature: 0,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,8 +1,42 @@
|
|||||||
export const SUMMARIZE_CHAT_SYSTEM_PROMPT = `
|
export const SUMMARIZE_CHAT_SYSTEM_PROMPT = `
|
||||||
You are a helpful assistant that understands long conversations and can summarize them in a few bullet points.
|
You are a helpful assistant that summarizes AI coding chat sessions with a focus on technical changes and file modifications.
|
||||||
|
|
||||||
I want you to write down the gist of the conversation in a few bullet points, focusing on the major changes, particularly
|
Your task is to analyze the conversation and provide:
|
||||||
at the end of the conversation.
|
|
||||||
|
|
||||||
Use <dyad-chat-summary> for setting the chat summary (put this at the end). The chat summary should be less than a sentence, but more than a few words. YOU SHOULD ALWAYS INCLUDE EXACTLY ONE CHAT TITLE
|
1. **Chat Summary**: A concise summary (less than a sentence, more than a few words) that captures the primary objective or outcome of the session.
|
||||||
|
|
||||||
|
2. **Major Changes**: Identify and highlight:
|
||||||
|
- Major code modifications, refactors, or new features implemented
|
||||||
|
- Critical bug fixes or debugging sessions
|
||||||
|
- Architecture or design pattern changes
|
||||||
|
- Important decisions made during the conversation
|
||||||
|
|
||||||
|
3. **Relevant Files**: List the most important files discussed or modified, with brief context:
|
||||||
|
- Files that received significant changes
|
||||||
|
- New files created
|
||||||
|
- Files central to the discussion or problem-solving
|
||||||
|
- Format: \`path/to/file.ext - brief description of changes\`
|
||||||
|
|
||||||
|
4. **Focus on Recency**: Prioritize changes and discussions from the latter part of the conversation, as these typically represent the final state or most recent decisions.
|
||||||
|
|
||||||
|
**Output Format:**
|
||||||
|
|
||||||
|
## Major Changes
|
||||||
|
- Bullet point of significant change 1
|
||||||
|
- Bullet point of significant change 2
|
||||||
|
|
||||||
|
## Important Context
|
||||||
|
- Any critical decisions, trade-offs, or next steps discussed
|
||||||
|
|
||||||
|
## Relevant Files
|
||||||
|
- \`file1.ts\` - Description of changes
|
||||||
|
- \`file2.py\` - Description of changes
|
||||||
|
|
||||||
|
<dyad-chat-summary>
|
||||||
|
[Your concise summary here - less than a sentence, more than a few words]
|
||||||
|
</dyad-chat-summary>
|
||||||
|
|
||||||
|
**Reminder:**
|
||||||
|
|
||||||
|
YOU MUST ALWAYS INCLUDE EXACTLY ONE <dyad-chat-summary> TAG AT THE END.
|
||||||
`;
|
`;
|
||||||
|
|||||||
@@ -371,6 +371,15 @@ export default Index;
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for high token usage marker to simulate near context limit
|
||||||
|
const highTokensMatch =
|
||||||
|
typeof lastMessage?.content === "string" &&
|
||||||
|
!lastMessage?.content.startsWith("Summarize the following chat:") &&
|
||||||
|
lastMessage?.content?.match?.(/\[high-tokens=(\d+)\]/);
|
||||||
|
const highTokensValue = highTokensMatch
|
||||||
|
? parseInt(highTokensMatch[1], 10)
|
||||||
|
: null;
|
||||||
|
|
||||||
// Split the message into characters to simulate streaming
|
// Split the message into characters to simulate streaming
|
||||||
const messageChars = messageContent.split("");
|
const messageChars = messageContent.split("");
|
||||||
|
|
||||||
@@ -388,8 +397,15 @@ export default Index;
|
|||||||
res.write(createStreamChunk(batch));
|
res.write(createStreamChunk(batch));
|
||||||
index += batchSize;
|
index += batchSize;
|
||||||
} else {
|
} else {
|
||||||
// Send the final chunk
|
// Send the final chunk with optional usage info for high token simulation
|
||||||
res.write(createStreamChunk("", "assistant", true));
|
const usage = highTokensValue
|
||||||
|
? {
|
||||||
|
prompt_tokens: highTokensValue - 100,
|
||||||
|
completion_tokens: 100,
|
||||||
|
total_tokens: highTokensValue,
|
||||||
|
}
|
||||||
|
: undefined;
|
||||||
|
res.write(createStreamChunk("", "assistant", true, usage));
|
||||||
clearInterval(interval);
|
clearInterval(interval);
|
||||||
res.end();
|
res.end();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,8 +29,13 @@ export function createStreamChunk(
|
|||||||
content: string,
|
content: string,
|
||||||
role: string = "assistant",
|
role: string = "assistant",
|
||||||
isLast: boolean = false,
|
isLast: boolean = false,
|
||||||
|
usage?: {
|
||||||
|
prompt_tokens: number;
|
||||||
|
completion_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
},
|
||||||
) {
|
) {
|
||||||
const chunk = {
|
const chunk: any = {
|
||||||
id: `chatcmpl-${Date.now()}`,
|
id: `chatcmpl-${Date.now()}`,
|
||||||
object: "chat.completion.chunk",
|
object: "chat.completion.chunk",
|
||||||
created: Math.floor(Date.now() / 1000),
|
created: Math.floor(Date.now() / 1000),
|
||||||
@@ -44,6 +49,11 @@ export function createStreamChunk(
|
|||||||
],
|
],
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Add usage info to the final chunk if provided
|
||||||
|
if (isLast && usage) {
|
||||||
|
chunk.usage = usage;
|
||||||
|
}
|
||||||
|
|
||||||
return `data: ${JSON.stringify(chunk)}\n\n${isLast ? "data: [DONE]\n\n" : ""}`;
|
return `data: ${JSON.stringify(chunk)}\n\n${isLast ? "data: [DONE]\n\n" : ""}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user