From e65b80bcfabdbfb11c9120d7e63686a03279202b Mon Sep 17 00:00:00 2001
From: Will Chen <willchen90@gmail.com>
Date: Mon, 28 Apr 2025 13:43:34 -0700
Subject: [PATCH] Set explicit max output tokens to avoid truncated responses
 (#31)

---
 src/constants/models.ts                  | 13 +++++++++++++
 src/ipc/handlers/chat_stream_handlers.ts |  5 ++---
 src/ipc/utils/get_model_client.ts        | 19 ++++++++++++++++++-
 3 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/src/constants/models.ts b/src/constants/models.ts
index c85d9a1..8fcd199 100644
--- a/src/constants/models.ts
+++ b/src/constants/models.ts
@@ -4,47 +4,60 @@ export interface ModelOption {
   displayName: string;
   description: string;
   tag?: string;
+  maxOutputTokens?: number;
 }
 
 type RegularModelProvider = Exclude<ModelProvider, "ollama">;
 export const MODEL_OPTIONS: Record<RegularModelProvider, ModelOption[]> = {
   openai: [
+    // https://platform.openai.com/docs/models/gpt-4.1
     {
       name: "gpt-4.1",
       displayName: "GPT 4.1",
       description: "OpenAI's flagship model",
+      maxOutputTokens: 32_768,
     },
+    // https://platform.openai.com/docs/models/gpt-4.1-mini
     {
       name: "gpt-4.1-mini",
       displayName: "GPT 4.1 Mini",
       description: "OpenAI's lightweight, but intelligent model",
+      maxOutputTokens: 32_768,
     },
+    // https://platform.openai.com/docs/models/o3-mini
     {
       name: "o3-mini",
       displayName: "o3 mini",
       description: "Reasoning model",
+      maxOutputTokens: 100_000,
     },
   ],
+  // https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-comparison-table
   anthropic: [
     {
       name: "claude-3-7-sonnet-latest",
       displayName: "Claude 3.7 Sonnet",
       description: "Excellent coder",
+      maxOutputTokens: 64_000,
     },
   ],
   google: [
+    // https://ai.google.dev/gemini-api/docs/models#gemini-2.5-pro-preview-03-25
     {
       name: "gemini-2.5-pro-exp-03-25",
       displayName: "Gemini 2.5 Pro",
       description: "Experimental version of Google's Gemini 2.5 Pro model",
       tag: "Recommended",
+      maxOutputTokens: 65_536,
     },
   ],
   openrouter: [
+    // https://openrouter.ai/deepseek/deepseek-chat-v3-0324:free
     {
       name: "deepseek/deepseek-chat-v3-0324:free",
       displayName: "DeepSeek v3 (free)",
       description: "Use for free (data may be used for training)",
+      maxOutputTokens: 32_000,
     },
   ],
   auto: [
diff --git a/src/ipc/handlers/chat_stream_handlers.ts b/src/ipc/handlers/chat_stream_handlers.ts
index 4835abc..75c389e 100644
--- a/src/ipc/handlers/chat_stream_handlers.ts
+++ b/src/ipc/handlers/chat_stream_handlers.ts
@@ -15,7 +15,7 @@ import { extractCodebase } from "../../utils/codebase";
 import { processFullResponseActions } from "../processors/response_processor";
 import { streamTestResponse } from "./testing_chat_handlers";
 import { getTestResponse } from "./testing_chat_handlers";
-import { getModelClient } from "../utils/get_model_client";
+import { getMaxTokens, getModelClient } from "../utils/get_model_client";
 import log from "electron-log";
 import {
   getSupabaseContext,
@@ -165,9 +165,8 @@ export function registerChatStreamHandlers() {
         } else {
           systemPrompt += "\n\n" + SUPABASE_NOT_AVAILABLE_SYSTEM_PROMPT;
         }
-
         const { textStream } = streamText({
-          maxTokens: 8_000,
+          maxTokens: getMaxTokens(settings.selectedModel),
           temperature: 0,
           model: modelClient,
           system: systemPrompt,
diff --git a/src/ipc/utils/get_model_client.ts b/src/ipc/utils/get_model_client.ts
index 39b92e1..a62edf8 100644
--- a/src/ipc/utils/get_model_client.ts
+++ b/src/ipc/utils/get_model_client.ts
@@ -9,6 +9,7 @@ import {
   PROVIDER_TO_ENV_VAR,
   AUTO_MODELS,
   PROVIDERS,
+  MODEL_OPTIONS,
 } from "../../constants/models";
 import { getEnvVar } from "./read_env";
 import log from "electron-log";
@@ -27,7 +28,7 @@ export function getModelClient(
         getEnvVar(PROVIDER_TO_ENV_VAR[autoModel.provider]);
 
       if (apiKey) {
-        console.log(
+        logger.log(
           `Using provider: ${autoModel.provider} model: ${autoModel.name}`
         );
         // Use the first model that has an API key
@@ -89,3 +90,19 @@ export function getModelClient(
     }
   }
 }
+
+// Most models support at least 8000 output tokens so we use it as a default value.
+const DEFAULT_MAX_TOKENS = 8_000;
+
+export function getMaxTokens(model: LargeLanguageModel) {
+  if (!MODEL_OPTIONS[model.provider as keyof typeof MODEL_OPTIONS]) {
+    logger.warn(
+      `Model provider ${model.provider} not found in MODEL_OPTIONS. Using default max tokens.`
+    );
+    return DEFAULT_MAX_TOKENS;
+  }
+  const modelOption = MODEL_OPTIONS[
+    model.provider as keyof typeof MODEL_OPTIONS
+  ].find((m) => m.name === model.name);
+  return modelOption?.maxOutputTokens || DEFAULT_MAX_TOKENS;
+}