From 26305ee090d06fe3c3e0a1747c7df5f5f9a5d568 Mon Sep 17 00:00:00 2001 From: Will Chen Date: Fri, 9 May 2025 14:16:15 -0700 Subject: [PATCH] Fix max output tokens due to weird discrepancy with vertexAI (#123) --- src/constants/models.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/constants/models.ts b/src/constants/models.ts index 5385d54..bd94d88 100644 --- a/src/constants/models.ts +++ b/src/constants/models.ts @@ -53,7 +53,8 @@ export const MODEL_OPTIONS: Record = { displayName: "Gemini 2.5 Pro", description: "Experimental version of Google's Gemini 2.5 Pro model", tag: "Recommended", - maxOutputTokens: 65_536, + // See Flash 2.5 comment below (go 1 below just to be safe, even though it seems OK now). + maxOutputTokens: 65_536 - 1, // Gemini context window = input token + output token contextWindow: 1_048_576, }, @@ -62,7 +63,8 @@ export const MODEL_OPTIONS: Record = { name: "gemini-2.5-flash-preview-04-17", displayName: "Gemini 2.5 Flash", description: "Preview version of Google's Gemini 2.5 Flash model", - maxOutputTokens: 65_536, + // Weirdly for Vertex AI, the output token limit is *exclusive* of the stated limit. + maxOutputTokens: 65_536 - 1, // Gemini context window = input token + output token contextWindow: 1_048_576, },