Set explicit max output tokens to avoid truncated responses (#31)
This commit is contained in:
@@ -4,47 +4,60 @@ export interface ModelOption {
|
|||||||
displayName: string;
|
displayName: string;
|
||||||
description: string;
|
description: string;
|
||||||
tag?: string;
|
tag?: string;
|
||||||
|
maxOutputTokens?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
type RegularModelProvider = Exclude<ModelProvider, "ollama">;
|
type RegularModelProvider = Exclude<ModelProvider, "ollama">;
|
||||||
export const MODEL_OPTIONS: Record<RegularModelProvider, ModelOption[]> = {
|
export const MODEL_OPTIONS: Record<RegularModelProvider, ModelOption[]> = {
|
||||||
openai: [
|
openai: [
|
||||||
|
// https://platform.openai.com/docs/models/gpt-4.1
|
||||||
{
|
{
|
||||||
name: "gpt-4.1",
|
name: "gpt-4.1",
|
||||||
displayName: "GPT 4.1",
|
displayName: "GPT 4.1",
|
||||||
description: "OpenAI's flagship model",
|
description: "OpenAI's flagship model",
|
||||||
|
maxOutputTokens: 32_768,
|
||||||
},
|
},
|
||||||
|
// https://platform.openai.com/docs/models/gpt-4.1-mini
|
||||||
{
|
{
|
||||||
name: "gpt-4.1-mini",
|
name: "gpt-4.1-mini",
|
||||||
displayName: "GPT 4.1 Mini",
|
displayName: "GPT 4.1 Mini",
|
||||||
description: "OpenAI's lightweight, but intelligent model",
|
description: "OpenAI's lightweight, but intelligent model",
|
||||||
|
maxOutputTokens: 32_768,
|
||||||
},
|
},
|
||||||
|
// https://platform.openai.com/docs/models/o3-mini
|
||||||
{
|
{
|
||||||
name: "o3-mini",
|
name: "o3-mini",
|
||||||
displayName: "o3 mini",
|
displayName: "o3 mini",
|
||||||
description: "Reasoning model",
|
description: "Reasoning model",
|
||||||
|
maxOutputTokens: 100_000,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
// https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-comparison-table
|
||||||
anthropic: [
|
anthropic: [
|
||||||
{
|
{
|
||||||
name: "claude-3-7-sonnet-latest",
|
name: "claude-3-7-sonnet-latest",
|
||||||
displayName: "Claude 3.7 Sonnet",
|
displayName: "Claude 3.7 Sonnet",
|
||||||
description: "Excellent coder",
|
description: "Excellent coder",
|
||||||
|
maxOutputTokens: 64_000,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
google: [
|
google: [
|
||||||
|
// https://ai.google.dev/gemini-api/docs/models#gemini-2.5-pro-preview-03-25
|
||||||
{
|
{
|
||||||
name: "gemini-2.5-pro-exp-03-25",
|
name: "gemini-2.5-pro-exp-03-25",
|
||||||
displayName: "Gemini 2.5 Pro",
|
displayName: "Gemini 2.5 Pro",
|
||||||
description: "Experimental version of Google's Gemini 2.5 Pro model",
|
description: "Experimental version of Google's Gemini 2.5 Pro model",
|
||||||
tag: "Recommended",
|
tag: "Recommended",
|
||||||
|
maxOutputTokens: 65_536,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
openrouter: [
|
openrouter: [
|
||||||
|
// https://openrouter.ai/deepseek/deepseek-chat-v3-0324:free
|
||||||
{
|
{
|
||||||
name: "deepseek/deepseek-chat-v3-0324:free",
|
name: "deepseek/deepseek-chat-v3-0324:free",
|
||||||
displayName: "DeepSeek v3 (free)",
|
displayName: "DeepSeek v3 (free)",
|
||||||
description: "Use for free (data may be used for training)",
|
description: "Use for free (data may be used for training)",
|
||||||
|
maxOutputTokens: 32_000,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
auto: [
|
auto: [
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ import { extractCodebase } from "../../utils/codebase";
|
|||||||
import { processFullResponseActions } from "../processors/response_processor";
|
import { processFullResponseActions } from "../processors/response_processor";
|
||||||
import { streamTestResponse } from "./testing_chat_handlers";
|
import { streamTestResponse } from "./testing_chat_handlers";
|
||||||
import { getTestResponse } from "./testing_chat_handlers";
|
import { getTestResponse } from "./testing_chat_handlers";
|
||||||
import { getModelClient } from "../utils/get_model_client";
|
import { getMaxTokens, getModelClient } from "../utils/get_model_client";
|
||||||
import log from "electron-log";
|
import log from "electron-log";
|
||||||
import {
|
import {
|
||||||
getSupabaseContext,
|
getSupabaseContext,
|
||||||
@@ -165,9 +165,8 @@ export function registerChatStreamHandlers() {
|
|||||||
} else {
|
} else {
|
||||||
systemPrompt += "\n\n" + SUPABASE_NOT_AVAILABLE_SYSTEM_PROMPT;
|
systemPrompt += "\n\n" + SUPABASE_NOT_AVAILABLE_SYSTEM_PROMPT;
|
||||||
}
|
}
|
||||||
|
|
||||||
const { textStream } = streamText({
|
const { textStream } = streamText({
|
||||||
maxTokens: 8_000,
|
maxTokens: getMaxTokens(settings.selectedModel),
|
||||||
temperature: 0,
|
temperature: 0,
|
||||||
model: modelClient,
|
model: modelClient,
|
||||||
system: systemPrompt,
|
system: systemPrompt,
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import {
|
|||||||
PROVIDER_TO_ENV_VAR,
|
PROVIDER_TO_ENV_VAR,
|
||||||
AUTO_MODELS,
|
AUTO_MODELS,
|
||||||
PROVIDERS,
|
PROVIDERS,
|
||||||
|
MODEL_OPTIONS,
|
||||||
} from "../../constants/models";
|
} from "../../constants/models";
|
||||||
import { getEnvVar } from "./read_env";
|
import { getEnvVar } from "./read_env";
|
||||||
import log from "electron-log";
|
import log from "electron-log";
|
||||||
@@ -27,7 +28,7 @@ export function getModelClient(
|
|||||||
getEnvVar(PROVIDER_TO_ENV_VAR[autoModel.provider]);
|
getEnvVar(PROVIDER_TO_ENV_VAR[autoModel.provider]);
|
||||||
|
|
||||||
if (apiKey) {
|
if (apiKey) {
|
||||||
console.log(
|
logger.log(
|
||||||
`Using provider: ${autoModel.provider} model: ${autoModel.name}`
|
`Using provider: ${autoModel.provider} model: ${autoModel.name}`
|
||||||
);
|
);
|
||||||
// Use the first model that has an API key
|
// Use the first model that has an API key
|
||||||
@@ -89,3 +90,19 @@ export function getModelClient(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Most models support at least 8000 output tokens so we use it as a default value.
|
||||||
|
const DEFAULT_MAX_TOKENS = 8_000;
|
||||||
|
|
||||||
|
export function getMaxTokens(model: LargeLanguageModel) {
|
||||||
|
if (!MODEL_OPTIONS[model.provider as keyof typeof MODEL_OPTIONS]) {
|
||||||
|
logger.warn(
|
||||||
|
`Model provider ${model.provider} not found in MODEL_OPTIONS. Using default max tokens.`
|
||||||
|
);
|
||||||
|
return DEFAULT_MAX_TOKENS;
|
||||||
|
}
|
||||||
|
const modelOption = MODEL_OPTIONS[
|
||||||
|
model.provider as keyof typeof MODEL_OPTIONS
|
||||||
|
].find((m) => m.name === model.name);
|
||||||
|
return modelOption?.maxOutputTokens || DEFAULT_MAX_TOKENS;
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user