diff --git a/drizzle/0017_sharp_corsair.sql b/drizzle/0017_sharp_corsair.sql
new file mode 100644
index 0000000..19cea52
--- /dev/null
+++ b/drizzle/0017_sharp_corsair.sql
@@ -0,0 +1 @@
+ALTER TABLE `messages` ADD `max_tokens_used` integer;
\ No newline at end of file
diff --git a/drizzle/meta/0017_snapshot.json b/drizzle/meta/0017_snapshot.json
new file mode 100644
index 0000000..7bcfc10
--- /dev/null
+++ b/drizzle/meta/0017_snapshot.json
@@ -0,0 +1,767 @@
+{
+  "version": "6",
+  "dialect": "sqlite",
+  "id": "071199d7-dfb5-4681-85b7-228f1de3123a",
+  "prevId": "c0a49147-ac92-4046-afe8-42f20df9314b",
+  "tables": {
+    "apps": {
+      "name": "apps",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "path": {
+          "name": "path",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        },
+        "github_org": {
+          "name": "github_org",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "github_repo": {
+          "name": "github_repo",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "github_branch": {
+          "name": "github_branch",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "supabase_project_id": {
+          "name": "supabase_project_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "supabase_parent_project_id": {
+          "name": "supabase_parent_project_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "neon_project_id": {
+          "name": "neon_project_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "neon_development_branch_id": {
+          "name": "neon_development_branch_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "neon_preview_branch_id": {
+          "name": "neon_preview_branch_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "vercel_project_id": {
+          "name": "vercel_project_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "vercel_project_name": {
+          "name": "vercel_project_name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "vercel_team_id": {
+          "name": "vercel_team_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "vercel_deployment_url": {
+          "name": "vercel_deployment_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "install_command": {
+          "name": "install_command",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "start_command": {
+          "name": "start_command",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "chat_context": {
+          "name": "chat_context",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "is_favorite": {
+          "name": "is_favorite",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "0"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "chats": {
+      "name": "chats",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": true
+        },
+        "app_id": {
+          "name": "app_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "title": {
+          "name": "title",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "initial_commit_hash": {
+          "name": "initial_commit_hash",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "chats_app_id_apps_id_fk": {
+          "name": "chats_app_id_apps_id_fk",
+          "tableFrom": "chats",
+          "tableTo": "apps",
+          "columnsFrom": [
+            "app_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "language_model_providers": {
+      "name": "language_model_providers",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "api_base_url": {
+          "name": "api_base_url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "env_var_name": {
+          "name": "env_var_name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "language_models": {
+      "name": "language_models",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": true
+        },
+        "display_name": {
+          "name": "display_name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "api_name": {
+          "name": "api_name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "builtin_provider_id": {
+          "name": "builtin_provider_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "custom_provider_id": {
+          "name": "custom_provider_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "max_output_tokens": {
+          "name": "max_output_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "context_window": {
+          "name": "context_window",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "language_models_custom_provider_id_language_model_providers_id_fk": {
+          "name": "language_models_custom_provider_id_language_model_providers_id_fk",
+          "tableFrom": "language_models",
+          "tableTo": "language_model_providers",
+          "columnsFrom": [
+            "custom_provider_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "mcp_servers": {
+      "name": "mcp_servers",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": true
+        },
+        "name": {
+          "name": "name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "transport": {
+          "name": "transport",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "command": {
+          "name": "command",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "args": {
+          "name": "args",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "env_json": {
+          "name": "env_json",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "url": {
+          "name": "url",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "enabled": {
+          "name": "enabled",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "0"
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "mcp_tool_consents": {
+      "name": "mcp_tool_consents",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": true
+        },
+        "server_id": {
+          "name": "server_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "tool_name": {
+          "name": "tool_name",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "consent": {
+          "name": "consent",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "'ask'"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        }
+      },
+      "indexes": {
+        "uniq_mcp_consent": {
+          "name": "uniq_mcp_consent",
+          "columns": [
+            "server_id",
+            "tool_name"
+          ],
+          "isUnique": true
+        }
+      },
+      "foreignKeys": {
+        "mcp_tool_consents_server_id_mcp_servers_id_fk": {
+          "name": "mcp_tool_consents_server_id_mcp_servers_id_fk",
+          "tableFrom": "mcp_tool_consents",
+          "tableTo": "mcp_servers",
+          "columnsFrom": [
+            "server_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "messages": {
+      "name": "messages",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": true
+        },
+        "chat_id": {
+          "name": "chat_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "role": {
+          "name": "role",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "content": {
+          "name": "content",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "approval_state": {
+          "name": "approval_state",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "source_commit_hash": {
+          "name": "source_commit_hash",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "commit_hash": {
+          "name": "commit_hash",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "request_id": {
+          "name": "request_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "max_tokens_used": {
+          "name": "max_tokens_used",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "messages_chat_id_chats_id_fk": {
+          "name": "messages_chat_id_chats_id_fk",
+          "tableFrom": "messages",
+          "tableTo": "chats",
+          "columnsFrom": [
+            "chat_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "prompts": {
+      "name": "prompts",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": true
+        },
+        "title": {
+          "name": "title",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "description": {
+          "name": "description",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "content": {
+          "name": "content",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "versions": {
+      "name": "versions",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": true
+        },
+        "app_id": {
+          "name": "app_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "commit_hash": {
+          "name": "commit_hash",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "neon_db_timestamp": {
+          "name": "neon_db_timestamp",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "(unixepoch())"
+        }
+      },
+      "indexes": {
+        "versions_app_commit_unique": {
+          "name": "versions_app_commit_unique",
+          "columns": [
+            "app_id",
+            "commit_hash"
+          ],
+          "isUnique": true
+        }
+      },
+      "foreignKeys": {
+        "versions_app_id_apps_id_fk": {
+          "name": "versions_app_id_apps_id_fk",
+          "tableFrom": "versions",
+          "tableTo": "apps",
+          "columnsFrom": [
+            "app_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "cascade",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    }
+  },
+  "views": {},
+  "enums": {},
+  "_meta": {
+    "schemas": {},
+    "tables": {},
+    "columns": {}
+  },
+  "internal": {
+    "indexes": {}
+  }
+}
diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json
index 6ba6f69..5baeffe 100644
--- a/drizzle/meta/_journal.json
+++ b/drizzle/meta/_journal.json
@@ -120,6 +120,13 @@
       "when": 1762297039106,
       "tag": "0016_petite_thanos",
       "breakpoints": true
+    },
+    {
+      "idx": 17,
+      "version": "6",
+      "when": 1764804624402,
+      "tag": "0017_sharp_corsair",
+      "breakpoints": true
     }
   ]
 }
\ No newline at end of file
diff --git a/e2e-tests/context_limit_banner.spec.ts b/e2e-tests/context_limit_banner.spec.ts
new file mode 100644
index 0000000..32d0d7d
--- /dev/null
+++ b/e2e-tests/context_limit_banner.spec.ts
@@ -0,0 +1,46 @@
+import { test, Timeout } from "./helpers/test_helper";
+import { expect } from "@playwright/test";
+
+test("context limit banner appears and summarize works", async ({ po }) => {
+  await po.setUp();
+
+  // Send a message that triggers high token usage (110k tokens)
+  // With a default context window of 128k, this leaves only 18k tokens remaining
+  // which is below the 40k threshold to show the banner
+  await po.sendPrompt("tc=context-limit-response [high-tokens=110000]");
+
+  // Verify the context limit banner appears
+  const contextLimitBanner = po.page.getByTestId("context-limit-banner");
+  await expect(contextLimitBanner).toBeVisible({ timeout: Timeout.MEDIUM });
+
+  // Verify banner text
+  await expect(contextLimitBanner).toContainText(
+    "You're close to the context limit for this chat.",
+  );
+
+  // Click the summarize button
+  await contextLimitBanner
+    .getByRole("button", { name: "Summarize into new chat" })
+    .click();
+
+  // Wait for the new chat to load and message to complete
+  await po.waitForChatCompletion();
+
+  // Snapshot the messages in the new chat
+  await po.snapshotMessages();
+});
+
+test("context limit banner does not appear when within limit", async ({
+  po,
+}) => {
+  await po.setUp();
+
+  // Send a message with low token usage (50k tokens)
+  // With a 128k context window, this leaves 78k tokens remaining
+  // which is above the 40k threshold - banner should NOT appear
+  await po.sendPrompt("tc=context-limit-response [high-tokens=50000]");
+
+  // Verify the context limit banner does NOT appear
+  const contextLimitBanner = po.page.getByTestId("context-limit-banner");
+  await expect(contextLimitBanner).not.toBeVisible();
+});
diff --git a/e2e-tests/fixtures/context-limit-response.md b/e2e-tests/fixtures/context-limit-response.md
new file mode 100644
index 0000000..4eb7451
--- /dev/null
+++ b/e2e-tests/fixtures/context-limit-response.md
@@ -0,0 +1,4 @@
+Here is a simple response to test the context limit banner functionality.
+
+This message simulates being close to the model's context window limit.
+
diff --git a/e2e-tests/snapshots/context_limit_banner.spec.ts_context-limit-banner-appears-and-summarize-works-1.aria.yml b/e2e-tests/snapshots/context_limit_banner.spec.ts_context-limit-banner-appears-and-summarize-works-1.aria.yml
new file mode 100644
index 0000000..24096f1
--- /dev/null
+++ b/e2e-tests/snapshots/context_limit_banner.spec.ts_context-limit-banner-appears-and-summarize-works-1.aria.yml
@@ -0,0 +1,14 @@
+- paragraph: Summarize from chat-id=1
+- img
+- text: file1.txt
+- button "Edit":
+  - img
+- img
+- text: file1.txt
+- paragraph: More EOM
+- button:
+  - img
+- img
+- text: less than a minute ago
+- button "Retry":
+  - img
\ No newline at end of file
diff --git a/src/components/ChatPanel.tsx b/src/components/ChatPanel.tsx
index 6f24cc2..ee6e858 100644
--- a/src/components/ChatPanel.tsx
+++ b/src/components/ChatPanel.tsx
@@ -97,7 +97,11 @@ export function ChatPanel({
     const streamCount = chatId ? (streamCountById.get(chatId) ?? 0) : 0;
     console.log("streamCount - scrolling to bottom", streamCount);
     scrollToBottom();
-  }, [chatId, chatId ? (streamCountById.get(chatId) ?? 0) : 0]);
+  }, [
+    chatId,
+    chatId ? (streamCountById.get(chatId) ?? 0) : 0,
+    chatId ? (isStreamingById.get(chatId) ?? false) : false,
+  ]);
 
   useEffect(() => {
     const container = messagesContainerRef.current;
diff --git a/src/components/ModelPicker.tsx b/src/components/ModelPicker.tsx
index 0354aa1..b8b2149 100644
--- a/src/components/ModelPicker.tsx
+++ b/src/components/ModelPicker.tsx
@@ -27,11 +27,17 @@ import { useSettings } from "@/hooks/useSettings";
 import { PriceBadge } from "@/components/PriceBadge";
 import { TURBO_MODELS } from "@/ipc/shared/language_model_constants";
 import { cn } from "@/lib/utils";
+import { useQueryClient } from "@tanstack/react-query";
+import { TOKEN_COUNT_QUERY_KEY } from "@/hooks/useCountTokens";
 
 export function ModelPicker() {
   const { settings, updateSettings } = useSettings();
+  const queryClient = useQueryClient();
   const onModelSelect = (model: LargeLanguageModel) => {
     updateSettings({ selectedModel: model });
+    // Invalidate token count when model changes since different models have different context windows
+    // (technically they have different tokenizers, but we don't keep track of that).
+    queryClient.invalidateQueries({ queryKey: TOKEN_COUNT_QUERY_KEY });
   };
 
   const [open, setOpen] = useState(false);
diff --git a/src/components/chat/ChatInput.tsx b/src/components/chat/ChatInput.tsx
index 2b43302..e7f30c5 100644
--- a/src/components/chat/ChatInput.tsx
+++ b/src/components/chat/ChatInput.tsx
@@ -52,13 +52,14 @@ import {
   TooltipProvider,
   TooltipTrigger,
 } from "../ui/tooltip";
-import { useNavigate } from "@tanstack/react-router";
+
 import { useVersions } from "@/hooks/useVersions";
 import { useAttachments } from "@/hooks/useAttachments";
 import { AttachmentsList } from "./AttachmentsList";
 import { DragDropOverlay } from "./DragDropOverlay";
 import { FileAttachmentDropdown } from "./FileAttachmentDropdown";
-import { showError, showExtraFilesToast } from "@/lib/toast";
+import { showExtraFilesToast } from "@/lib/toast";
+import { useSummarizeInNewChat } from "./SummarizeInNewChatButton";
 import { ChatInputControls } from "../ChatInputControls";
 import { ChatErrorBox } from "./ChatErrorBox";
 import {
@@ -419,30 +420,10 @@ function SuggestionButton({
 }
 
 function SummarizeInNewChatButton() {
-  const chatId = useAtomValue(selectedChatIdAtom);
-  const appId = useAtomValue(selectedAppIdAtom);
-  const { streamMessage } = useStreamChat();
-  const navigate = useNavigate();
-  const onClick = async () => {
-    if (!appId) {
-      console.error("No app id found");
-      return;
-    }
-    try {
-      const newChatId = await IpcClient.getInstance().createChat(appId);
-      // navigate to new chat
-      await navigate({ to: "/chat", search: { id: newChatId } });
-      await streamMessage({
-        prompt: "Summarize from chat-id=" + chatId,
-        chatId: newChatId,
-      });
-    } catch (err) {
-      showError(err);
-    }
-  };
+  const { handleSummarize } = useSummarizeInNewChat();
   return (
     <SuggestionButton
-      onClick={onClick}
+      onClick={handleSummarize}
       tooltipText="Creating a new chat makes the AI more focused and efficient"
     >
       Summarize to new chat
diff --git a/src/components/chat/ChatMessage.tsx b/src/components/chat/ChatMessage.tsx
index 86c70d1..99acc59 100644
--- a/src/components/chat/ChatMessage.tsx
+++ b/src/components/chat/ChatMessage.tsx
@@ -12,6 +12,7 @@ import {
   GitCommit,
   Copy,
   Check,
+  Info,
 } from "lucide-react";
 import { formatDistanceToNow, format } from "date-fns";
 import { useVersions } from "@/hooks/useVersions";
@@ -277,6 +278,20 @@ const ChatMessage = ({ message, isLastMessage }: ChatMessageProps) => {
                 </Tooltip>
               </TooltipProvider>
             )}
+            {isLastMessage && message.totalTokens && (
+              <TooltipProvider>
+                <Tooltip>
+                  <TooltipTrigger asChild>
+                    <div className="flex items-center space-x-1 px-1 py-0.5">
+                      <Info className="h-3 w-3" />
+                    </div>
+                  </TooltipTrigger>
+                  <TooltipContent>
+                    Max tokens used: {message.totalTokens.toLocaleString()}
+                  </TooltipContent>
+                </Tooltip>
+              </TooltipProvider>
+            )}
           </div>
         )}
       </div>
diff --git a/src/components/chat/ContextLimitBanner.tsx b/src/components/chat/ContextLimitBanner.tsx
new file mode 100644
index 0000000..8c9cd23
--- /dev/null
+++ b/src/components/chat/ContextLimitBanner.tsx
@@ -0,0 +1,89 @@
+import { AlertTriangle, ArrowRight } from "lucide-react";
+import { Button } from "@/components/ui/button";
+import {
+  Tooltip,
+  TooltipContent,
+  TooltipTrigger,
+} from "@/components/ui/tooltip";
+import { useSummarizeInNewChat } from "./SummarizeInNewChatButton";
+
+const CONTEXT_LIMIT_THRESHOLD = 40_000;
+
+interface ContextLimitBannerProps {
+  totalTokens?: number | null;
+  contextWindow?: number;
+}
+
+function formatTokenCount(count: number): string {
+  if (count >= 1000) {
+    return `${(count / 1000).toFixed(1)}k`.replace(".0k", "k");
+  }
+  return count.toString();
+}
+
+export function ContextLimitBanner({
+  totalTokens,
+  contextWindow,
+}: ContextLimitBannerProps) {
+  const { handleSummarize } = useSummarizeInNewChat();
+
+  // Don't show banner if we don't have the necessary data
+  if (!totalTokens || !contextWindow) {
+    return null;
+  }
+
+  // Check if we're within 40k tokens of the context limit
+  const tokensRemaining = contextWindow - totalTokens;
+  if (tokensRemaining > CONTEXT_LIMIT_THRESHOLD) {
+    return null;
+  }
+
+  return (
+    <div
+      className="mx-auto max-w-3xl my-3 p-2 rounded-lg border border-amber-500/30 bg-amber-500/10 flex flex-col gap-2"
+      data-testid="context-limit-banner"
+    >
+      <div className="flex items-center gap-2 text-amber-600 dark:text-amber-400">
+        <Tooltip>
+          <TooltipTrigger asChild>
+            <Button
+              variant="ghost"
+              size="icon"
+              className="h-5 w-5 p-0 hover:bg-transparent text-amber-600 dark:text-amber-400 cursor-help"
+            >
+              <AlertTriangle className="h-4 w-4 shrink-0" />
+            </Button>
+          </TooltipTrigger>
+          <TooltipContent className="w-auto p-2 text-xs" side="top">
+            <div className="grid gap-1">
+              <div className="flex justify-between gap-4">
+                <span>Used:</span>
+                <span className="font-medium">
+                  {formatTokenCount(totalTokens)}
+                </span>
+              </div>
+              <div className="flex justify-between gap-4">
+                <span>Limit:</span>
+                <span className="font-medium">
+                  {formatTokenCount(contextWindow)}
+                </span>
+              </div>
+            </div>
+          </TooltipContent>
+        </Tooltip>
+        <p className="text-sm font-medium">
+          You're close to the context limit for this chat.
+        </p>
+      </div>
+      <Button
+        onClick={handleSummarize}
+        variant="outline"
+        size="sm"
+        className="h-8 border-amber-500/50 hover:bg-amber-500/20 hover:border-amber-500 text-amber-600 dark:text-amber-400"
+      >
+        Summarize into new chat
+        <ArrowRight className="h-3 w-3 ml-2" />
+      </Button>
+    </div>
+  );
+}
diff --git a/src/components/chat/MessagesList.tsx b/src/components/chat/MessagesList.tsx
index 61ca90f..92a52d6 100644
--- a/src/components/chat/MessagesList.tsx
+++ b/src/components/chat/MessagesList.tsx
@@ -18,6 +18,8 @@ import { useLanguageModelProviders } from "@/hooks/useLanguageModelProviders";
 import { useSettings } from "@/hooks/useSettings";
 import { useUserBudgetInfo } from "@/hooks/useUserBudgetInfo";
 import { PromoMessage } from "./PromoMessage";
+import { ContextLimitBanner } from "./ContextLimitBanner";
+import { useCountTokens } from "@/hooks/useCountTokens";
 
 interface MessagesListProps {
   messages: Message[];
@@ -36,6 +38,11 @@ export const MessagesList = forwardRef<HTMLDivElement, MessagesListProps>(
     const [isRetryLoading, setIsRetryLoading] = useState(false);
     const selectedChatId = useAtomValue(selectedChatIdAtom);
     const { userBudget } = useUserBudgetInfo();
+    // Only fetch token count when not streaming
+    const { result: tokenCountResult } = useCountTokens(
+      !isStreaming ? selectedChatId : null,
+      "",
+    );
 
     const renderSetupBanner = () => {
       const selectedModel = settings?.selectedModel;
@@ -73,6 +80,13 @@ export const MessagesList = forwardRef<HTMLDivElement, MessagesListProps>(
                 </div>
               </div>
             )}
+        {/* Show context limit banner when close to token limit */}
+        {!isStreaming && tokenCountResult && (
+          <ContextLimitBanner
+            totalTokens={tokenCountResult.actualMaxTokens}
+            contextWindow={tokenCountResult.contextWindow}
+          />
+        )}
         {!isStreaming && (
           <div className="flex max-w-3xl mx-auto gap-2">
             {!!messages.length &&
diff --git a/src/components/chat/SummarizeInNewChatButton.tsx b/src/components/chat/SummarizeInNewChatButton.tsx
new file mode 100644
index 0000000..345070a
--- /dev/null
+++ b/src/components/chat/SummarizeInNewChatButton.tsx
@@ -0,0 +1,38 @@
+import { useNavigate } from "@tanstack/react-router";
+import { useAtomValue } from "jotai";
+import { selectedChatIdAtom } from "@/atoms/chatAtoms";
+import { selectedAppIdAtom } from "@/atoms/appAtoms";
+import { useStreamChat } from "@/hooks/useStreamChat";
+import { IpcClient } from "@/ipc/ipc_client";
+import { showError } from "@/lib/toast";
+
+export function useSummarizeInNewChat() {
+  const chatId = useAtomValue(selectedChatIdAtom);
+  const appId = useAtomValue(selectedAppIdAtom);
+  const { streamMessage } = useStreamChat();
+  const navigate = useNavigate();
+
+  const handleSummarize = async () => {
+    if (!appId) {
+      console.error("No app id found");
+      return;
+    }
+    if (!chatId) {
+      console.error("No chat id found");
+      return;
+    }
+    try {
+      const newChatId = await IpcClient.getInstance().createChat(appId);
+      // navigate to new chat
+      await navigate({ to: "/chat", search: { id: newChatId } });
+      await streamMessage({
+        prompt: "Summarize from chat-id=" + chatId,
+        chatId: newChatId,
+      });
+    } catch (err) {
+      showError(err);
+    }
+  };
+
+  return { handleSummarize };
+}
diff --git a/src/components/chat/TokenBar.tsx b/src/components/chat/TokenBar.tsx
index be69b1a..58f5919 100644
--- a/src/components/chat/TokenBar.tsx
+++ b/src/components/chat/TokenBar.tsx
@@ -1,4 +1,4 @@
-import React, { useEffect, useState } from "react";
+import React from "react";
 import {
   Tooltip,
   TooltipContent,
@@ -24,31 +24,15 @@ interface TokenBarProps {
 
 export function TokenBar({ chatId }: TokenBarProps) {
   const [inputValue] = useAtom(chatInputValueAtom);
-  const { countTokens, result } = useCountTokens();
-  const [error, setError] = useState<string | null>(null);
   const { settings } = useSettings();
-  useEffect(() => {
-    if (!chatId) return;
-    // Mark this as used, we need to re-trigger token count
-    // when selected model changes.
-    void settings?.selectedModel;
-
-    const debounceTimer = setTimeout(() => {
-      countTokens(chatId, inputValue).catch((err) => {
-        setError("Failed to count tokens");
-        console.error("Token counting error:", err);
-      });
-    }, 500);
-
-    return () => clearTimeout(debounceTimer);
-  }, [chatId, inputValue, countTokens, settings?.selectedModel]);
+  const { result, error } = useCountTokens(chatId ?? null, inputValue);
 
   if (!chatId || !result) {
     return null;
   }
 
   const {
-    totalTokens,
+    estimatedTotalTokens: totalTokens,
     messageHistoryTokens,
     codebaseTokens,
     mentionedAppsTokens,
@@ -142,7 +126,9 @@ export function TokenBar({ chatId }: TokenBarProps) {
           </TooltipContent>
         </Tooltip>
       </TooltipProvider>
-      {error && <div className="text-red-500 text-xs mt-1">{error}</div>}
+      {error && (
+        <div className="text-red-500 text-xs mt-1">Failed to count tokens</div>
+      )}
       {(!settings?.enableProSmartFilesContextMode ||
         !settings?.enableDyadPro) && (
         <div className="text-xs text-center text-muted-foreground mt-2">
diff --git a/src/db/schema.ts b/src/db/schema.ts
index dfc1985..0b97f77 100644
--- a/src/db/schema.ts
+++ b/src/db/schema.ts
@@ -77,6 +77,8 @@ export const messages = sqliteTable("messages", {
   // The commit hash of the codebase at the time the message was sent
   commitHash: text("commit_hash"),
   requestId: text("request_id"),
+  // Max tokens used for this message (only for assistant messages)
+  maxTokensUsed: integer("max_tokens_used"),
   createdAt: integer("created_at", { mode: "timestamp" })
     .notNull()
     .default(sql`(unixepoch())`),
diff --git a/src/hooks/useCountTokens.ts b/src/hooks/useCountTokens.ts
index 9964fd0..6318382 100644
--- a/src/hooks/useCountTokens.ts
+++ b/src/hooks/useCountTokens.ts
@@ -1,43 +1,62 @@
-import { useCallback } from "react";
-import { atom, useAtom } from "jotai";
+import {
+  keepPreviousData,
+  useQuery,
+  useQueryClient,
+} from "@tanstack/react-query";
 import { IpcClient } from "@/ipc/ipc_client";
 import type { TokenCountResult } from "@/ipc/ipc_types";
+import { useCallback, useEffect, useState } from "react";
 
-// Create atoms to store the token count state
-export const tokenCountResultAtom = atom<TokenCountResult | null>(null);
-export const tokenCountLoadingAtom = atom<boolean>(false);
-export const tokenCountErrorAtom = atom<Error | null>(null);
+export const TOKEN_COUNT_QUERY_KEY = ["tokenCount"] as const;
 
-export function useCountTokens() {
-  const [result, setResult] = useAtom(tokenCountResultAtom);
-  const [loading, setLoading] = useAtom(tokenCountLoadingAtom);
-  const [error, setError] = useAtom(tokenCountErrorAtom);
+export function useCountTokens(chatId: number | null, input: string = "") {
+  const queryClient = useQueryClient();
 
-  const countTokens = useCallback(
-    async (chatId: number, input: string) => {
-      setLoading(true);
-      setError(null);
+  // Debounce input so we don't call the token counting IPC on every keystroke.
+  const [debouncedInput, setDebouncedInput] = useState(input);
 
-      try {
-        const ipcClient = IpcClient.getInstance();
-        const tokenResult = await ipcClient.countTokens({ chatId, input });
-        setResult(tokenResult);
-        return tokenResult;
-      } catch (error) {
-        console.error("Error counting tokens:", error);
-        setError(error instanceof Error ? error : new Error(String(error)));
-        throw error;
-      } finally {
-        setLoading(false);
-      }
+  useEffect(() => {
+    // If there's no chat, don't bother debouncing
+    if (chatId === null) {
+      setDebouncedInput(input);
+      return;
+    }
+
+    const handle = setTimeout(() => {
+      setDebouncedInput(input);
+    }, 1_000);
+
+    return () => clearTimeout(handle);
+  }, [chatId, input]);
+
+  const {
+    data: result = null,
+    isLoading: loading,
+    error,
+    refetch,
+  } = useQuery<TokenCountResult | null>({
+    queryKey: [...TOKEN_COUNT_QUERY_KEY, chatId, debouncedInput],
+    queryFn: async () => {
+      if (chatId === null) return null;
+      return IpcClient.getInstance().countTokens({
+        chatId,
+        input: debouncedInput,
+      });
     },
-    [setLoading, setError, setResult],
-  );
+    placeholderData: keepPreviousData,
+    enabled: chatId !== null,
+  });
+
+  // For imperative invalidation (e.g., after streaming completes)
+  const invalidateTokenCount = useCallback(() => {
+    queryClient.invalidateQueries({ queryKey: TOKEN_COUNT_QUERY_KEY });
+  }, [queryClient]);
 
   return {
-    countTokens,
     result,
     loading,
     error,
+    refetch,
+    invalidateTokenCount,
   };
 }
diff --git a/src/hooks/useStreamChat.ts b/src/hooks/useStreamChat.ts
index 2dcc131..430dbaa 100644
--- a/src/hooks/useStreamChat.ts
+++ b/src/hooks/useStreamChat.ts
@@ -49,7 +49,6 @@ export function useStreamChat({
   const setStreamCountById = useSetAtom(chatStreamCountByIdAtom);
   const { refreshVersions } = useVersions(selectedAppId);
   const { refreshAppIframe } = useRunApp();
-  const { countTokens } = useCountTokens();
   const { refetchUserBudget } = useUserBudgetInfo();
   const { checkProblems } = useCheckProblems(selectedAppId);
   const { settings } = useSettings();
@@ -62,6 +61,7 @@ export function useStreamChat({
     chatId = id;
   }
   let { refreshProposal } = hasChatId ? useProposal(chatId) : useProposal();
+  const { invalidateTokenCount } = useCountTokens(chatId ?? null, "");
 
   const streamMessage = useCallback(
     async ({
@@ -154,7 +154,7 @@ export function useStreamChat({
             refreshChats();
             refreshApp();
             refreshVersions();
-            countTokens(chatId, "");
+            invalidateTokenCount();
             onSettled?.();
           },
           onError: (errorMessage: string) => {
@@ -174,7 +174,7 @@ export function useStreamChat({
             refreshChats();
             refreshApp();
             refreshVersions();
-            countTokens(chatId, "");
+            invalidateTokenCount();
             onSettled?.();
           },
         });
diff --git a/src/ipc/handlers/chat_stream_handlers.ts b/src/ipc/handlers/chat_stream_handlers.ts
index eeaf6d6..8134f17 100644
--- a/src/ipc/handlers/chat_stream_handlers.ts
+++ b/src/ipc/handlers/chat_stream_handlers.ts
@@ -448,6 +448,7 @@ ${componentSnippet}
       });
 
       let fullResponse = "";
+      let maxTokensUsed: number | undefined;
 
       // Check if this is a test prompt
       const testResponse = getTestResponse(req.prompt);
@@ -885,7 +886,7 @@ This conversation includes one or more image attachments. When the user uploads
             } satisfies GoogleGenerativeAIProviderOptions;
           }
 
-          return streamText({
+          const streamResult = streamText({
             headers: isAnthropic
               ? {
                   "anthropic-beta": "context-1m-2025-08-07",
@@ -900,6 +901,33 @@ This conversation includes one or more image attachments. When the user uploads
             system: systemPromptOverride,
             tools,
             messages: chatMessages.filter((m) => m.content),
+            onFinish: (response) => {
+              const totalTokens = response.usage?.totalTokens;
+
+              if (typeof totalTokens === "number") {
+                // We use the highest total tokens used (we are *not* accumulating)
+                // since we're trying to figure it out if we're near the context limit.
+                maxTokensUsed = Math.max(maxTokensUsed ?? 0, totalTokens);
+
+                // Persist the aggregated token usage on the placeholder assistant message
+                void db
+                  .update(messages)
+                  .set({ maxTokensUsed: maxTokensUsed })
+                  .where(eq(messages.id, placeholderAssistantMessage.id))
+                  .catch((error) => {
+                    logger.error(
+                      "Failed to save total tokens for assistant message",
+                      error,
+                    );
+                  });
+
+                logger.log(
+                  `Total tokens used (aggregated for message ${placeholderAssistantMessage.id}): ${maxTokensUsed}`,
+                );
+              } else {
+                logger.log("Total tokens used: unknown");
+              }
+            },
             onError: (error: any) => {
               let errorMessage = (error as any)?.error?.message;
               const responseBody = error?.error?.responseBody;
@@ -923,6 +951,10 @@ This conversation includes one or more image attachments. When the user uploads
             },
             abortSignal: abortController.signal,
           });
+          return {
+            fullStream: streamResult.fullStream,
+            usage: streamResult.usage,
+          };
         };
 
         let lastDbSaveAt = 0;
diff --git a/src/ipc/handlers/token_count_handlers.ts b/src/ipc/handlers/token_count_handlers.ts
index 6934941..240bc50 100644
--- a/src/ipc/handlers/token_count_handlers.ts
+++ b/src/ipc/handlers/token_count_handlers.ts
@@ -141,8 +141,15 @@ export function registerTokenCountHandlers() {
         codebaseTokens +
         mentionedAppsTokens;
 
+      // Find the last assistant message since totalTokens is only set on assistant messages
+      const lastAssistantMessage = [...chat.messages]
+        .reverse()
+        .find((m) => m.role === "assistant");
+      const actualMaxTokens = lastAssistantMessage?.maxTokensUsed ?? null;
+
       return {
-        totalTokens,
+        estimatedTotalTokens: totalTokens,
+        actualMaxTokens,
         messageHistoryTokens,
         codebaseTokens,
         mentionedAppsTokens,
diff --git a/src/ipc/ipc_types.ts b/src/ipc/ipc_types.ts
index 16f2f33..917e226 100644
--- a/src/ipc/ipc_types.ts
+++ b/src/ipc/ipc_types.ts
@@ -49,6 +49,8 @@ export interface ChatResponseEnd {
   updatedFiles: boolean;
   extraFiles?: string[];
   extraFilesError?: string;
+  totalTokens?: number;
+  contextWindow?: number;
 }
 
 export interface ChatProblemsEvent {
@@ -81,6 +83,7 @@ export interface Message {
   dbTimestamp?: string | null;
   createdAt?: Date | string;
   requestId?: string | null;
+  totalTokens?: number | null;
 }
 
 export interface Chat {
@@ -167,7 +170,8 @@ export interface TokenCountParams {
 }
 
 export interface TokenCountResult {
-  totalTokens: number;
+  estimatedTotalTokens: number;
+  actualMaxTokens: number | null;
   messageHistoryTokens: number;
   codebaseTokens: number;
   mentionedAppsTokens: number;
diff --git a/src/ipc/shared/language_model_constants.ts b/src/ipc/shared/language_model_constants.ts
index 7cb7387..f36987d 100644
--- a/src/ipc/shared/language_model_constants.ts
+++ b/src/ipc/shared/language_model_constants.ts
@@ -306,11 +306,9 @@ export const MODEL_OPTIONS: Record<string, ModelOption[]> = {
       displayName: "Auto",
       description: "Automatically selects the best model",
       tag: "Default",
-      // These are below Gemini 2.5 Pro & Flash limits
-      // which are the ones defaulted to for both regular auto
-      // and smart auto.
+      // The following is reasonable defaults.
       maxOutputTokens: 32_000,
-      contextWindow: 1_000_000,
+      contextWindow: 200_000,
       temperature: 0,
     },
     {
diff --git a/src/prompts/summarize_chat_system_prompt.ts b/src/prompts/summarize_chat_system_prompt.ts
index e399ebc..1b60a47 100644
--- a/src/prompts/summarize_chat_system_prompt.ts
+++ b/src/prompts/summarize_chat_system_prompt.ts
@@ -1,8 +1,42 @@
 export const SUMMARIZE_CHAT_SYSTEM_PROMPT = `
-You are a helpful assistant that understands long conversations and can summarize them in a few bullet points.
+You are a helpful assistant that summarizes AI coding chat sessions with a focus on technical changes and file modifications.
 
-I want you to write down the gist of the conversation in a few bullet points, focusing on the major changes, particularly
-at the end of the conversation.
+Your task is to analyze the conversation and provide:
 
-Use <dyad-chat-summary> for setting the chat summary (put this at the end). The chat summary should be less than a sentence, but more than a few words. YOU SHOULD ALWAYS INCLUDE EXACTLY ONE CHAT TITLE
+1. **Chat Summary**: A concise summary (less than a sentence, more than a few words) that captures the primary objective or outcome of the session.
+
+2. **Major Changes**: Identify and highlight:
+   - Major code modifications, refactors, or new features implemented
+   - Critical bug fixes or debugging sessions
+   - Architecture or design pattern changes
+   - Important decisions made during the conversation
+
+3. **Relevant Files**: List the most important files discussed or modified, with brief context:
+   - Files that received significant changes
+   - New files created
+   - Files central to the discussion or problem-solving
+   - Format: \`path/to/file.ext - brief description of changes\`
+
+4. **Focus on Recency**: Prioritize changes and discussions from the latter part of the conversation, as these typically represent the final state or most recent decisions.
+
+**Output Format:**
+
+## Major Changes
+- Bullet point of significant change 1
+- Bullet point of significant change 2
+
+## Important Context
+- Any critical decisions, trade-offs, or next steps discussed
+
+## Relevant Files
+- \`file1.ts\` - Description of changes
+- \`file2.py\` - Description of changes
+
+<dyad-chat-summary>
+[Your concise summary here - less than a sentence, more than a few words]
+</dyad-chat-summary>
+
+**Reminder:**
+
+YOU MUST ALWAYS INCLUDE EXACTLY ONE <dyad-chat-summary> TAG AT THE END.
 `;
diff --git a/testing/fake-llm-server/chatCompletionHandler.ts b/testing/fake-llm-server/chatCompletionHandler.ts
index 32b2f4a..3ea25c8 100644
--- a/testing/fake-llm-server/chatCompletionHandler.ts
+++ b/testing/fake-llm-server/chatCompletionHandler.ts
@@ -371,6 +371,15 @@ export default Index;
       return;
     }
 
+    // Check for high token usage marker to simulate near context limit
+    const highTokensMatch =
+      typeof lastMessage?.content === "string" &&
+      !lastMessage?.content.startsWith("Summarize the following chat:") &&
+      lastMessage?.content?.match?.(/\[high-tokens=(\d+)\]/);
+    const highTokensValue = highTokensMatch
+      ? parseInt(highTokensMatch[1], 10)
+      : null;
+
     // Split the message into characters to simulate streaming
     const messageChars = messageContent.split("");
 
@@ -388,8 +397,15 @@ export default Index;
         res.write(createStreamChunk(batch));
         index += batchSize;
       } else {
-        // Send the final chunk
-        res.write(createStreamChunk("", "assistant", true));
+        // Send the final chunk with optional usage info for high token simulation
+        const usage = highTokensValue
+          ? {
+              prompt_tokens: highTokensValue - 100,
+              completion_tokens: 100,
+              total_tokens: highTokensValue,
+            }
+          : undefined;
+        res.write(createStreamChunk("", "assistant", true, usage));
         clearInterval(interval);
         res.end();
       }
diff --git a/testing/fake-llm-server/index.ts b/testing/fake-llm-server/index.ts
index c5fa243..1b4c30a 100644
--- a/testing/fake-llm-server/index.ts
+++ b/testing/fake-llm-server/index.ts
@@ -29,8 +29,13 @@ export function createStreamChunk(
   content: string,
   role: string = "assistant",
   isLast: boolean = false,
+  usage?: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  },
 ) {
-  const chunk = {
+  const chunk: any = {
     id: `chatcmpl-${Date.now()}`,
     object: "chat.completion.chunk",
     created: Math.floor(Date.now() / 1000),
@@ -44,6 +49,11 @@ export function createStreamChunk(
     ],
   };
 
+  // Add usage info to the final chunk if provided
+  if (isLast && usage) {
+    chunk.usage = usage;
+  }
+
   return `data: ${JSON.stringify(chunk)}\n\n${isLast ? "data: [DONE]\n\n" : ""}`;
 }