Fix max output tokens due to weird discrepancy with vertexAI (#123)

This commit is contained in:
Will Chen
2025-05-09 14:16:15 -07:00
committed by GitHub
parent b2eb05a1bc
commit 26305ee090

View File

@@ -53,7 +53,8 @@ export const MODEL_OPTIONS: Record<RegularModelProvider, ModelOption[]> = {
displayName: "Gemini 2.5 Pro", displayName: "Gemini 2.5 Pro",
description: "Experimental version of Google's Gemini 2.5 Pro model", description: "Experimental version of Google's Gemini 2.5 Pro model",
tag: "Recommended", tag: "Recommended",
maxOutputTokens: 65_536, // See Flash 2.5 comment below (go 1 below just to be safe, even though it seems OK now).
maxOutputTokens: 65_536 - 1,
// Gemini context window = input token + output token // Gemini context window = input token + output token
contextWindow: 1_048_576, contextWindow: 1_048_576,
}, },
@@ -62,7 +63,8 @@ export const MODEL_OPTIONS: Record<RegularModelProvider, ModelOption[]> = {
name: "gemini-2.5-flash-preview-04-17", name: "gemini-2.5-flash-preview-04-17",
displayName: "Gemini 2.5 Flash", displayName: "Gemini 2.5 Flash",
description: "Preview version of Google's Gemini 2.5 Flash model", description: "Preview version of Google's Gemini 2.5 Flash model",
maxOutputTokens: 65_536, // Weirdly for Vertex AI, the output token limit is *exclusive* of the stated limit.
maxOutputTokens: 65_536 - 1,
// Gemini context window = input token + output token // Gemini context window = input token + output token
contextWindow: 1_048_576, contextWindow: 1_048_576,
}, },