Set explicit max output tokens to avoid truncated responses (#31)

This commit is contained in:
Will Chen
2025-04-28 13:43:34 -07:00
committed by GitHub
parent 9b94122672
commit e65b80bcfa
3 changed files with 33 additions and 4 deletions

View File

@@ -4,47 +4,60 @@ export interface ModelOption {
displayName: string;
description: string;
tag?: string;
maxOutputTokens?: number;
}
type RegularModelProvider = Exclude<ModelProvider, "ollama">;
export const MODEL_OPTIONS: Record<RegularModelProvider, ModelOption[]> = {
openai: [
// https://platform.openai.com/docs/models/gpt-4.1
{
name: "gpt-4.1",
displayName: "GPT 4.1",
description: "OpenAI's flagship model",
maxOutputTokens: 32_768,
},
// https://platform.openai.com/docs/models/gpt-4.1-mini
{
name: "gpt-4.1-mini",
displayName: "GPT 4.1 Mini",
description: "OpenAI's lightweight, but intelligent model",
maxOutputTokens: 32_768,
},
// https://platform.openai.com/docs/models/o3-mini
{
name: "o3-mini",
displayName: "o3 mini",
description: "Reasoning model",
maxOutputTokens: 100_000,
},
],
// https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-comparison-table
anthropic: [
{
name: "claude-3-7-sonnet-latest",
displayName: "Claude 3.7 Sonnet",
description: "Excellent coder",
maxOutputTokens: 64_000,
},
],
google: [
// https://ai.google.dev/gemini-api/docs/models#gemini-2.5-pro-preview-03-25
{
name: "gemini-2.5-pro-exp-03-25",
displayName: "Gemini 2.5 Pro",
description: "Experimental version of Google's Gemini 2.5 Pro model",
tag: "Recommended",
maxOutputTokens: 65_536,
},
],
openrouter: [
// https://openrouter.ai/deepseek/deepseek-chat-v3-0324:free
{
name: "deepseek/deepseek-chat-v3-0324:free",
displayName: "DeepSeek v3 (free)",
description: "Use for free (data may be used for training)",
maxOutputTokens: 32_000,
},
],
auto: [

View File

@@ -15,7 +15,7 @@ import { extractCodebase } from "../../utils/codebase";
import { processFullResponseActions } from "../processors/response_processor";
import { streamTestResponse } from "./testing_chat_handlers";
import { getTestResponse } from "./testing_chat_handlers";
import { getModelClient } from "../utils/get_model_client";
import { getMaxTokens, getModelClient } from "../utils/get_model_client";
import log from "electron-log";
import {
getSupabaseContext,
@@ -165,9 +165,8 @@ export function registerChatStreamHandlers() {
} else {
systemPrompt += "\n\n" + SUPABASE_NOT_AVAILABLE_SYSTEM_PROMPT;
}
const { textStream } = streamText({
maxTokens: 8_000,
maxTokens: getMaxTokens(settings.selectedModel),
temperature: 0,
model: modelClient,
system: systemPrompt,

View File

@@ -9,6 +9,7 @@ import {
PROVIDER_TO_ENV_VAR,
AUTO_MODELS,
PROVIDERS,
MODEL_OPTIONS,
} from "../../constants/models";
import { getEnvVar } from "./read_env";
import log from "electron-log";
@@ -27,7 +28,7 @@ export function getModelClient(
getEnvVar(PROVIDER_TO_ENV_VAR[autoModel.provider]);
if (apiKey) {
console.log(
logger.log(
`Using provider: ${autoModel.provider} model: ${autoModel.name}`
);
// Use the first model that has an API key
@@ -89,3 +90,19 @@ export function getModelClient(
}
}
}
// Most models support at least 8000 output tokens so we use it as a default value.
const DEFAULT_MAX_TOKENS = 8_000;
export function getMaxTokens(model: LargeLanguageModel) {
if (!MODEL_OPTIONS[model.provider as keyof typeof MODEL_OPTIONS]) {
logger.warn(
`Model provider ${model.provider} not found in MODEL_OPTIONS. Using default max tokens.`
);
return DEFAULT_MAX_TOKENS;
}
const modelOption = MODEL_OPTIONS[
model.provider as keyof typeof MODEL_OPTIONS
].find((m) => m.name === model.name);
return modelOption?.maxOutputTokens || DEFAULT_MAX_TOKENS;
}