More free models (#1244)

<!-- This is an auto-generated description by cubic. -->

## Summary by cubic
Adds support for free OpenRouter models and a new “Free (OpenRouter)”
auto option that fails over across free models for reliability. Improves
setup flow and UI with provider cards, a “Free” price badge, and an
OpenRouter setup prompt in chat.

- **New Features**
- Added OpenRouter free models: Qwen3 Coder (free), DeepSeek v3 (free),
DeepSeek v3.1 (free), marked with dollarSigns=0 and a “Free” badge.
- New auto model: “Free (OpenRouter)” that uses a fallback client to
cycle through free models with smart retry on transient errors.
- New SetupProviderCard component and updated SetupBanner with dedicated
Google and OpenRouter setup cards.
- Chat shows an OpenRouter setup prompt when “Free (OpenRouter)” is
selected and OpenRouter isn’t configured.
- New PriceBadge component in ModelPicker to display “Free” or price
tier.
- E2E: added setup flow test and option to show the setup screen in
tests.
- Model updates: added DeepSeek v3.1, updated Kimi K2 to kimi-k2-0905,
migrated providers to LanguageModelV2.

<!-- End of auto-generated description by cubic. -->
This commit is contained in:
Will Chen
2025-09-10 14:20:17 -07:00
committed by GitHub
parent 7150082f5a
commit 72acb31d59
11 changed files with 573 additions and 72 deletions

View File

@@ -166,6 +166,34 @@ export const MODEL_OPTIONS: Record<string, ModelOption[]> = {
},
],
openrouter: [
{
name: "qwen/qwen3-coder:free",
displayName: "Qwen3 Coder (free)",
description: "Use for free (data may be used for training)",
maxOutputTokens: 32_000,
contextWindow: 262_000,
temperature: 0,
dollarSigns: 0,
},
// https://openrouter.ai/deepseek/deepseek-chat-v3-0324:free
{
name: "deepseek/deepseek-chat-v3.1:free",
displayName: "DeepSeek v3.1 (free)",
description: "Use for free (data may be used for training)",
maxOutputTokens: 32_000,
contextWindow: 128_000,
temperature: 0,
dollarSigns: 0,
},
{
name: "deepseek/deepseek-chat-v3-0324:free",
displayName: "DeepSeek v3 (free)",
description: "Use for free (data may be used for training)",
maxOutputTokens: 32_000,
contextWindow: 128_000,
temperature: 0,
dollarSigns: 0,
},
{
name: "qwen/qwen3-coder",
displayName: "Qwen3 Coder",
@@ -175,11 +203,10 @@ export const MODEL_OPTIONS: Record<string, ModelOption[]> = {
temperature: 0,
dollarSigns: 2,
},
// https://openrouter.ai/deepseek/deepseek-chat-v3-0324:free
{
name: "deepseek/deepseek-chat-v3-0324:free",
displayName: "DeepSeek v3 (free)",
description: "Use for free (data may be used for training)",
name: "deepseek/deepseek-chat-v3.1",
displayName: "DeepSeek v3.1",
description: "Strong cost-effective model with optional thinking",
maxOutputTokens: 32_000,
contextWindow: 128_000,
temperature: 0,
@@ -187,20 +214,11 @@ export const MODEL_OPTIONS: Record<string, ModelOption[]> = {
},
// https://openrouter.ai/moonshotai/kimi-k2
{
name: "moonshotai/kimi-k2",
name: "moonshotai/kimi-k2-0905",
displayName: "Kimi K2",
description: "Powerful cost-effective model",
description: "Powerful cost-effective model (updated to 0905)",
maxOutputTokens: 32_000,
contextWindow: 131_000,
temperature: 0,
dollarSigns: 2,
},
{
name: "deepseek/deepseek-r1-0528",
displayName: "DeepSeek R1",
description: "Good reasoning model with excellent price for performance",
maxOutputTokens: 32_000,
contextWindow: 128_000,
contextWindow: 256_000,
temperature: 0,
dollarSigns: 2,
},
@@ -218,6 +236,18 @@ export const MODEL_OPTIONS: Record<string, ModelOption[]> = {
contextWindow: 1_000_000,
temperature: 0,
},
{
name: "free",
displayName: "Free (OpenRouter)",
description: "Selects from one of the free OpenRouter models",
tag: "Free",
// These are below Gemini 2.5 Pro & Flash limits
// which are the ones defaulted to for both regular auto
// and smart auto.
maxOutputTokens: 32_000,
contextWindow: 128_000,
temperature: 0,
},
],
azure: [
{
@@ -311,6 +341,10 @@ export const MODEL_OPTIONS: Record<string, ModelOption[]> = {
],
};
export const FREE_OPENROUTER_MODEL_NAMES = MODEL_OPTIONS.openrouter
.filter((model) => model.name.endsWith(":free"))
.map((model) => model.name);
export const PROVIDER_TO_ENV_VAR: Record<string, string> = {
openai: "OPENAI_API_KEY",
anthropic: "ANTHROPIC_API_KEY",

View File

@@ -0,0 +1,367 @@
import {
LanguageModelV2,
LanguageModelV2CallOptions,
LanguageModelV2StreamPart,
} from "@ai-sdk/provider";
// Types
interface FallbackSettings {
models: Array<LanguageModelV2>;
}
interface RetryState {
attemptNumber: number;
modelsAttempted: Set<number>;
initialModelIndex: number;
errors: Array<{ modelId: string; error: Error }>;
}
interface StreamResult {
stream: ReadableStream<LanguageModelV2StreamPart>;
request?: { body?: unknown };
response?: { headers?: Record<string, string> };
}
// Error classification
const RETRYABLE_STATUS_CODES = new Set([
401, // Unauthorized - wrong API key
403, // Forbidden - permission error
408, // Request Timeout
409, // Conflict
413, // Payload Too Large
429, // Too Many Requests
500, // Internal Server Error
502, // Bad Gateway
503, // Service Unavailable
504, // Gateway Timeout
]);
const RETRYABLE_ERROR_PATTERNS = [
"overloaded",
"service unavailable",
"bad gateway",
"too many requests",
"internal server error",
"gateway timeout",
"rate_limit",
"wrong-key",
"unexpected",
"capacity",
"timeout",
"server_error",
"econnrefused",
"enotfound",
"econnreset",
"epipe",
"etimedout",
];
export function defaultShouldRetryThisError(error: any): boolean {
if (!error) return false;
try {
// Check status code
const statusCode =
error?.statusCode || error?.status || error?.response?.status;
if (
statusCode &&
(RETRYABLE_STATUS_CODES.has(statusCode) || statusCode >= 500)
) {
return true;
}
// Check error message patterns
const errorString = (
error?.message ||
error?.code ||
error?.type ||
JSON.stringify(error)
).toLowerCase();
return RETRYABLE_ERROR_PATTERNS.some((pattern) =>
errorString.includes(pattern),
);
} catch {
// If we can't parse the error, don't retry
return false;
}
}
export function createFallback(settings: FallbackSettings): FallbackModel {
return new FallbackModel(settings);
}
export class FallbackModel implements LanguageModelV2 {
readonly specificationVersion = "v2";
private readonly settings: FallbackSettings;
private currentModelIndex: number = 0;
private lastModelReset: number = Date.now();
private readonly modelResetInterval: number;
private readonly retryAfterOutput: boolean;
private readonly maxRetries: number;
private isRetrying: boolean = false;
constructor(settings: FallbackSettings) {
// Validate settings
if (!settings.models || settings.models.length === 0) {
throw new Error("At least one model must be provided in settings.models");
}
this.settings = settings;
this.modelResetInterval = 3 * 60 * 1000; // Default: 3 minutes
this.retryAfterOutput = true;
this.maxRetries = settings.models.length * 2; // Default: try each model twice
}
get modelId(): string {
return this.getCurrentModel().modelId;
}
get provider(): string {
return this.getCurrentModel().provider;
}
get supportedUrls():
| Record<string, RegExp[]>
| PromiseLike<Record<string, RegExp[]>> {
return this.getCurrentModel().supportedUrls;
}
private getCurrentModel(): LanguageModelV2 {
const model = this.settings.models[this.currentModelIndex];
if (!model) {
throw new Error(`Model at index ${this.currentModelIndex} not found`);
}
return model;
}
private checkAndResetModel(): void {
// Only reset if we're not currently in a retry cycle
if (this.isRetrying) return;
const now = Date.now();
if (
this.currentModelIndex !== 0 &&
now - this.lastModelReset >= this.modelResetInterval
) {
this.currentModelIndex = 0;
this.lastModelReset = now;
}
}
private switchToNextModel(): void {
this.currentModelIndex =
(this.currentModelIndex + 1) % this.settings.models.length;
}
private async retry<T>(
operation: (state: RetryState) => Promise<T>,
operationName: string,
): Promise<T> {
const state: RetryState = {
attemptNumber: 0,
modelsAttempted: new Set([this.currentModelIndex]),
initialModelIndex: this.currentModelIndex,
errors: [],
};
this.isRetrying = true;
try {
while (state.attemptNumber < this.maxRetries) {
state.attemptNumber++;
try {
return await operation(state);
} catch (error) {
const err = error as Error;
state.errors.push({ modelId: this.modelId, error: err });
// Check if we should retry this error
if (!defaultShouldRetryThisError(err)) {
throw err;
}
// Call error handler if provided
// If we've tried all models at least once and still failing, throw
if (state.modelsAttempted.size === this.settings.models.length) {
// If we haven't hit max retries yet, we can try models again
if (state.attemptNumber >= this.maxRetries) {
throw new Error(
`All ${this.settings.models.length} models failed for ${operationName}. ` +
`Last error: ${err.message}`,
);
}
}
// Switch to next model
this.switchToNextModel();
state.modelsAttempted.add(this.currentModelIndex);
}
}
// Should never reach here, but just in case
throw new Error(
`Max retries (${this.maxRetries}) exceeded for ${operationName}`,
);
} finally {
this.isRetrying = false;
}
}
async doGenerate(): Promise<any> {
throw new Error("doGenerate is not supported for fallback model");
}
async doStream(options: LanguageModelV2CallOptions): Promise<StreamResult> {
this.checkAndResetModel();
return this.retry(async (retryState) => {
const result = await this.getCurrentModel().doStream(options);
// Create a wrapped stream that handles errors gracefully
const wrappedStream = this.createWrappedStream(
result.stream,
options,
retryState,
);
return {
...result,
stream: wrappedStream,
};
}, "stream");
}
private createWrappedStream(
originalStream: ReadableStream<LanguageModelV2StreamPart>,
options: LanguageModelV2CallOptions,
retryState: RetryState,
): ReadableStream<LanguageModelV2StreamPart> {
let hasStreamedContent = false;
// eslint-disable-next-line @typescript-eslint/no-this-alias
const fallbackModel = this;
return new ReadableStream<LanguageModelV2StreamPart>({
async start(controller) {
let reader: ReadableStreamDefaultReader<LanguageModelV2StreamPart> | null =
null;
const processStream = async (
stream: ReadableStream<LanguageModelV2StreamPart>,
): Promise<void> => {
reader = stream.getReader();
try {
while (true) {
const { done, value } = await reader.read();
if (done) {
controller.close();
return;
}
// Check for early errors before streaming content
if (!hasStreamedContent && value && "error" in value) {
const error = value.error as Error;
if (defaultShouldRetryThisError(error)) {
throw error;
}
}
controller.enqueue(value);
// Mark that we've streamed actual content (not just metadata)
if (value?.type && value.type !== "stream-start") {
hasStreamedContent = true;
}
}
} finally {
reader?.releaseLock();
}
};
try {
await processStream(originalStream);
} catch (error) {
const err = error as Error;
// Decide whether to retry
const shouldRetry =
(!hasStreamedContent || fallbackModel.retryAfterOutput) &&
defaultShouldRetryThisError(err) &&
retryState.attemptNumber < fallbackModel.maxRetries;
if (shouldRetry) {
// Track this error
retryState.errors.push({
modelId: fallbackModel.modelId,
error: err,
});
retryState.attemptNumber++;
// Switch to next model
fallbackModel.switchToNextModel();
retryState.modelsAttempted.add(fallbackModel.currentModelIndex);
// Check if we've tried all models
if (
retryState.modelsAttempted.size ===
fallbackModel.settings.models.length &&
retryState.attemptNumber >= fallbackModel.maxRetries
) {
controller.error(
new Error(
`All models failed during streaming. Last error: ${err.message}`,
),
);
return;
}
try {
// Create a new stream with the next model
const nextResult = await fallbackModel
.getCurrentModel()
.doStream(options);
await processStream(nextResult.stream);
} catch (nextError) {
// If the retry also fails, propagate the error
controller.error(nextError);
}
} else {
// Don't retry - propagate the error
controller.error(err);
}
}
},
cancel() {
// Handle stream cancellation if needed
},
});
}
}
// Export utility functions
export { defaultShouldRetryThisError as isRetryableError };
// Type guards for better error handling
export function isNetworkError(error: any): boolean {
const networkErrorCodes = [
"ECONNREFUSED",
"ENOTFOUND",
"ECONNRESET",
"EPIPE",
"ETIMEDOUT",
];
return error?.code && networkErrorCodes.includes(error.code);
}
export function isRateLimitError(error: any): boolean {
const statusCode = error?.statusCode || error?.status;
return (
statusCode === 429 ||
(error?.message && error.message.toLowerCase().includes("rate"))
);
}

View File

@@ -4,6 +4,7 @@ import { createAnthropic } from "@ai-sdk/anthropic";
import { createXai } from "@ai-sdk/xai";
import { createVertex as createGoogleVertex } from "@ai-sdk/google-vertex";
import { azure } from "@ai-sdk/azure";
import { LanguageModelV2 } from "@ai-sdk/provider";
import { createOpenRouter } from "@openrouter/ai-sdk-provider";
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
import { createAmazonBedrock } from "@ai-sdk/amazon-bedrock";
@@ -14,14 +15,17 @@ import type {
} from "../../lib/schemas";
import { getEnvVar } from "./read_env";
import log from "electron-log";
import { getLanguageModelProviders } from "../shared/language_model_helpers";
import {
FREE_OPENROUTER_MODEL_NAMES,
getLanguageModelProviders,
} from "../shared/language_model_helpers";
import { LanguageModelProvider } from "../ipc_types";
import { createDyadEngine } from "./llm_engine_provider";
import { LM_STUDIO_BASE_URL } from "./lm_studio_utils";
import { LanguageModel } from "ai";
import { createOllamaProvider } from "./ollama_provider";
import { getOllamaApiUrl } from "../handlers/local_model_ollama_handler";
import { createFallback } from "./fallback_ai_model";
const dyadEngineUrl = process.env.DYAD_ENGINE_URL;
const dyadGatewayUrl = process.env.DYAD_GATEWAY_URL;
@@ -31,6 +35,10 @@ const AUTO_MODELS = [
provider: "google",
name: "gemini-2.5-flash",
},
{
provider: "openrouter",
name: "qwen/qwen3-coder:free",
},
{
provider: "anthropic",
name: "claude-sonnet-4-20250514",
@@ -42,7 +50,7 @@ const AUTO_MODELS = [
];
export interface ModelClient {
model: LanguageModel;
model: LanguageModelV2;
builtinProviderId?: string;
}
@@ -142,6 +150,30 @@ export async function getModelClient(
}
// Handle 'auto' provider by trying each model in AUTO_MODELS until one works
if (model.provider === "auto") {
if (model.name === "free") {
const openRouterProvider = allProviders.find(
(p) => p.id === "openrouter",
);
if (!openRouterProvider) {
throw new Error("OpenRouter provider not found");
}
return {
modelClient: {
model: createFallback({
models: FREE_OPENROUTER_MODEL_NAMES.map(
(name: string) =>
getRegularModelClient(
{ provider: "openrouter", name },
settings,
openRouterProvider,
).modelClient.model,
),
}),
builtinProviderId: "openrouter",
},
isEngineEnabled: false,
};
}
for (const autoModel of AUTO_MODELS) {
const providerInfo = allProviders.find(
(p) => p.id === autoModel.provider,

View File

@@ -1,4 +1,3 @@
import { LanguageModel } from "ai";
import { OpenAICompatibleChatLanguageModel } from "@ai-sdk/openai-compatible";
import {
FetchFunction,
@@ -9,6 +8,7 @@ import {
import log from "electron-log";
import { getExtraProviderOptions } from "./thinking_utils";
import type { UserSettings } from "../../lib/schemas";
import { LanguageModelV2 } from "@ai-sdk/provider";
const logger = log.scope("llm_engine_provider");
@@ -53,7 +53,10 @@ export interface DyadEngineProvider {
/**
Creates a model for text generation.
*/
(modelId: ExampleChatModelId, settings?: ExampleChatSettings): LanguageModel;
(
modelId: ExampleChatModelId,
settings?: ExampleChatSettings,
): LanguageModelV2;
/**
Creates a chat model for text generation.
@@ -61,7 +64,7 @@ Creates a chat model for text generation.
chatModel(
modelId: ExampleChatModelId,
settings?: ExampleChatSettings,
): LanguageModel;
): LanguageModelV2;
}
export function createDyadEngine(

View File

@@ -1,8 +1,7 @@
import { LanguageModel } from "ai";
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
import type { FetchFunction } from "@ai-sdk/provider-utils";
import { withoutTrailingSlash } from "@ai-sdk/provider-utils";
import type {} from "@ai-sdk/provider";
import type { LanguageModelV2 } from "@ai-sdk/provider";
type OllamaChatModelId = string;
@@ -20,7 +19,7 @@ export interface OllamaProviderOptions {
export interface OllamaChatSettings {}
export interface OllamaProvider {
(modelId: OllamaChatModelId, settings?: OllamaChatSettings): LanguageModel;
(modelId: OllamaChatModelId, settings?: OllamaChatSettings): LanguageModelV2;
}
export function createOllamaProvider(