Replace thinking with native Gemini thinking summaries (#400)

This uses Gemini's native [thinking summaries](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking#thought-summaries) which were recently added to the API. Why? The grafted thinking would sometimes cause weird issues where the model, especially Gemini 2.5 Flash, got confused and put dyad tags like `<dyad-write>` inside the `<think>` tags. This also improves the UX because you can see the native thoughts rather than having the Gemini response load for a while without any feedback. I tried adding Anthropic extended thinking, however it requires temp to be set at 1, which isn't ideal for Dyad's use case where we need precise syntax following.
2025-06-16 17:29:32 -07:00
parent 3a6ab12bed
commit 30b5c0d0ef
39 changed files with 1020 additions and 628 deletions
--- a/src/ipc/handlers/chat_stream_handlers.ts
+++ b/src/ipc/handlers/chat_stream_handlers.ts
@@ -33,6 +33,9 @@ import { readFile, writeFile, unlink } from "fs/promises";
 import { getMaxTokens } from "../utils/token_utils";
 import { MAX_CHAT_TURNS_IN_CONTEXT } from "@/constants/settings_constants";
 import { validateChatContext } from "../utils/context_paths_utils";
+import { GoogleGenerativeAIProviderOptions } from "@ai-sdk/google";
+
+import { getExtraProviderOptions } from "../utils/thinking_utils";

 const logger = log.scope("chat_stream_handlers");

@@ -443,17 +446,31 @@ This conversation includes one or more image attachments. When the user uploads
        }

        // When calling streamText, the messages need to be properly formatted for mixed content
-        const { textStream } = streamText({
+        const { fullStream } = streamText({
          maxTokens: await getMaxTokens(settings.selectedModel),
          temperature: 0,
          maxRetries: 2,
          model: modelClient.model,
+          providerOptions: {
+            "dyad-gateway": getExtraProviderOptions(
+              modelClient.builtinProviderId,
+            ),
+            google: {
+              thinkingConfig: {
+                includeThoughts: true,
+              },
+            } satisfies GoogleGenerativeAIProviderOptions,
+          },
          system: systemPrompt,
          messages: chatMessages.filter((m) => m.content),
          onError: (error: any) => {
            logger.error("Error streaming text:", error);
-            const message =
-              (error as any)?.error?.message || JSON.stringify(error);
+            let errorMessage = (error as any)?.error?.message;
+            const responseBody = error?.error?.responseBody;
+            if (errorMessage && responseBody) {
+              errorMessage += "\n\nDetails: " + responseBody;
+            }
+            const message = errorMessage || JSON.stringify(error);
            event.sender.send(
              "chat:response:error",
              `Sorry, there was an error from the AI: ${message}`,
@@ -465,10 +482,38 @@ This conversation includes one or more image attachments. When the user uploads
        });

        // Process the stream as before
+        let inThinkingBlock = false;
        try {
-          for await (const textPart of textStream) {
-            fullResponse += textPart;
-            fullResponse = cleanThinkingByEscapingDyadTags(fullResponse);
+          for await (const part of fullStream) {
+            let chunk = "";
+            if (part.type === "text-delta") {
+              if (inThinkingBlock) {
+                chunk = "</think>";
+                inThinkingBlock = false;
+              }
+              chunk += part.textDelta;
+            } else if (part.type === "reasoning") {
+              if (!inThinkingBlock) {
+                chunk = "<think>";
+                inThinkingBlock = true;
+              }
+              // Escape dyad tags in reasoning content
+              // We are replacing the opening tag with a look-alike character
+              // to avoid issues where thinking content includes dyad tags
+              // and are mishandled by:
+              // 1. FE markdown parser
+              // 2. Main process response processor
+              chunk += part.textDelta
+                .replace(/<dyad/g, "＜dyad")
+                .replace(/<\/dyad/g, "＜/dyad");
+            }
+
+            if (!chunk) {
+              continue;
+            }
+
+            fullResponse += chunk;
+
            if (
              fullResponse.includes("$$SUPABASE_CLIENT_CODE$$") &&
              updatedChat.app?.supabaseProjectId
--- a/src/ipc/shared/language_model_helpers.ts
+++ b/src/ipc/shared/language_model_helpers.ts
@@ -6,6 +6,10 @@ import {
 import type { LanguageModelProvider, LanguageModel } from "@/ipc/ipc_types";
 import { eq } from "drizzle-orm";

+export const PROVIDERS_THAT_SUPPORT_THINKING: (keyof typeof MODEL_OPTIONS)[] = [
+  "google",
+];
+
 export interface ModelOption {
  name: string;
  displayName: string;
--- a/src/ipc/utils/get_model_client.ts
+++ b/src/ipc/utils/get_model_client.ts
@@ -116,6 +116,7 @@ export async function getModelClient(
        ? createDyadEngine({
            apiKey: dyadApiKey,
            baseURL: dyadEngineUrl ?? "https://engine.dyad.sh/v1",
+            originalProviderId: model.provider,
            dyadOptions: {
              enableLazyEdits: settings.enableProLazyEditsMode,
              enableSmartFilesContext: settings.enableProSmartFilesContextMode,
@@ -150,7 +151,7 @@ export async function getModelClient(
              }
            : undefined,
        ),
-        builtinProviderId: "auto",
+        builtinProviderId: model.provider,
      };

      return {
--- a/src/ipc/utils/llm_engine_provider.ts
+++ b/src/ipc/utils/llm_engine_provider.ts
@@ -11,6 +11,7 @@ import {

 import { OpenAICompatibleChatSettings } from "@ai-sdk/openai-compatible";
 import log from "electron-log";
+import { getExtraProviderOptions } from "./thinking_utils";

 const logger = log.scope("llm_engine_provider");

@@ -42,6 +43,7 @@ or to provide a custom fetch implementation for e.g. testing.
 */
  fetch?: FetchFunction;

+  originalProviderId: string;
  dyadOptions: {
    enableLazyEdits?: boolean;
    enableSmartFilesContext?: boolean;
@@ -113,42 +115,43 @@ export function createDyadEngine(
      defaultObjectGenerationMode:
        "tool" as LanguageModelV1ObjectGenerationMode,
      // Custom fetch implementation that adds files to the request
-      fetch: files?.length
-        ? (input: RequestInfo | URL, init?: RequestInit) => {
-            // Use default fetch if no init or body
-            if (!init || !init.body || typeof init.body !== "string") {
-              return (options.fetch || fetch)(input, init);
-            }
+      fetch: (input: RequestInfo | URL, init?: RequestInit) => {
+        // Use default fetch if no init or body
+        if (!init || !init.body || typeof init.body !== "string") {
+          return (options.fetch || fetch)(input, init);
+        }

-            try {
-              // Parse the request body to manipulate it
-              const parsedBody = JSON.parse(init.body);
+        try {
+          // Parse the request body to manipulate it
+          const parsedBody = {
+            ...JSON.parse(init.body),
+            ...getExtraProviderOptions(options.originalProviderId),
+          };

-              // Add files to the request if they exist
-              if (files?.length) {
-                parsedBody.dyad_options = {
-                  files,
-                  enable_lazy_edits: options.dyadOptions.enableLazyEdits,
-                  enable_smart_files_context:
-                    options.dyadOptions.enableSmartFilesContext,
-                };
-              }
-
-              // Return modified request with files included
-              const modifiedInit = {
-                ...init,
-                body: JSON.stringify(parsedBody),
-              };
-
-              // Use the provided fetch or default fetch
-              return (options.fetch || fetch)(input, modifiedInit);
-            } catch (e) {
-              logger.error("Error parsing request body", e);
-              // If parsing fails, use original request
-              return (options.fetch || fetch)(input, init);
-            }
+          // Add files to the request if they exist
+          if (files?.length) {
+            parsedBody.dyad_options = {
+              files,
+              enable_lazy_edits: options.dyadOptions.enableLazyEdits,
+              enable_smart_files_context:
+                options.dyadOptions.enableSmartFilesContext,
+            };
          }
-        : options.fetch,
+
+          // Return modified request with files included
+          const modifiedInit = {
+            ...init,
+            body: JSON.stringify(parsedBody),
+          };
+
+          // Use the provided fetch or default fetch
+          return (options.fetch || fetch)(input, modifiedInit);
+        } catch (e) {
+          logger.error("Error parsing request body", e);
+          // If parsing fails, use original request
+          return (options.fetch || fetch)(input, init);
+        }
+      },
    };

    return new OpenAICompatibleChatLanguageModel(modelId, restSettings, config);
--- a/src/ipc/utils/thinking_utils.ts
+++ b/src/ipc/utils/thinking_utils.ts
@@ -0,0 +1,18 @@
+import { PROVIDERS_THAT_SUPPORT_THINKING } from "../shared/language_model_helpers";
+
+export function getExtraProviderOptions(
+  providerId: string | undefined,
+): Record<string, any> {
+  if (!providerId) {
+    return {};
+  }
+  if (PROVIDERS_THAT_SUPPORT_THINKING.includes(providerId)) {
+    return {
+      thinking: {
+        type: "enabled",
+        include_thoughts: true,
+      },
+    };
+  }
+  return {};
+}