Replace thinking with native Gemini thinking summaries (#400)

This uses Gemini's native [thinking summaries](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking#thought-summaries) which were recently added to the API. Why? The grafted thinking would sometimes cause weird issues where the model, especially Gemini 2.5 Flash, got confused and put dyad tags like `<dyad-write>` inside the `<think>` tags. This also improves the UX because you can see the native thoughts rather than having the Gemini response load for a while without any feedback. I tried adding Anthropic extended thinking, however it requires temp to be set at 1, which isn't ideal for Dyad's use case where we need precise syntax following.
2025-06-16 17:29:32 -07:00
parent 3a6ab12bed
commit 30b5c0d0ef
39 changed files with 1020 additions and 628 deletions
--- a/src/ipc/handlers/chat_stream_handlers.ts
+++ b/src/ipc/handlers/chat_stream_handlers.ts
@@ -33,6 +33,9 @@ import { readFile, writeFile, unlink } from "fs/promises";
 import { getMaxTokens } from "../utils/token_utils";
 import { MAX_CHAT_TURNS_IN_CONTEXT } from "@/constants/settings_constants";
 import { validateChatContext } from "../utils/context_paths_utils";
+import { GoogleGenerativeAIProviderOptions } from "@ai-sdk/google";
+
+import { getExtraProviderOptions } from "../utils/thinking_utils";

 const logger = log.scope("chat_stream_handlers");

@@ -443,17 +446,31 @@ This conversation includes one or more image attachments. When the user uploads
        }

        // When calling streamText, the messages need to be properly formatted for mixed content
-        const { textStream } = streamText({
+        const { fullStream } = streamText({
          maxTokens: await getMaxTokens(settings.selectedModel),
          temperature: 0,
          maxRetries: 2,
          model: modelClient.model,
+          providerOptions: {
+            "dyad-gateway": getExtraProviderOptions(
+              modelClient.builtinProviderId,
+            ),
+            google: {
+              thinkingConfig: {
+                includeThoughts: true,
+              },
+            } satisfies GoogleGenerativeAIProviderOptions,
+          },
          system: systemPrompt,
          messages: chatMessages.filter((m) => m.content),
          onError: (error: any) => {
            logger.error("Error streaming text:", error);
-            const message =
-              (error as any)?.error?.message || JSON.stringify(error);
+            let errorMessage = (error as any)?.error?.message;
+            const responseBody = error?.error?.responseBody;
+            if (errorMessage && responseBody) {
+              errorMessage += "\n\nDetails: " + responseBody;
+            }
+            const message = errorMessage || JSON.stringify(error);
            event.sender.send(
              "chat:response:error",
              `Sorry, there was an error from the AI: ${message}`,
@@ -465,10 +482,38 @@ This conversation includes one or more image attachments. When the user uploads
        });

        // Process the stream as before
+        let inThinkingBlock = false;
        try {
-          for await (const textPart of textStream) {
-            fullResponse += textPart;
-            fullResponse = cleanThinkingByEscapingDyadTags(fullResponse);
+          for await (const part of fullStream) {
+            let chunk = "";
+            if (part.type === "text-delta") {
+              if (inThinkingBlock) {
+                chunk = "</think>";
+                inThinkingBlock = false;
+              }
+              chunk += part.textDelta;
+            } else if (part.type === "reasoning") {
+              if (!inThinkingBlock) {
+                chunk = "<think>";
+                inThinkingBlock = true;
+              }
+              // Escape dyad tags in reasoning content
+              // We are replacing the opening tag with a look-alike character
+              // to avoid issues where thinking content includes dyad tags
+              // and are mishandled by:
+              // 1. FE markdown parser
+              // 2. Main process response processor
+              chunk += part.textDelta
+                .replace(/<dyad/g, "＜dyad")
+                .replace(/<\/dyad/g, "＜/dyad");
+            }
+
+            if (!chunk) {
+              continue;
+            }
+
+            fullResponse += chunk;
+
            if (
              fullResponse.includes("$$SUPABASE_CLIENT_CODE$$") &&
              updatedChat.app?.supabaseProjectId
--- a/src/ipc/shared/language_model_helpers.ts
+++ b/src/ipc/shared/language_model_helpers.ts
@@ -6,6 +6,10 @@ import {
 import type { LanguageModelProvider, LanguageModel } from "@/ipc/ipc_types";
 import { eq } from "drizzle-orm";

+export const PROVIDERS_THAT_SUPPORT_THINKING: (keyof typeof MODEL_OPTIONS)[] = [
+  "google",
+];
+
 export interface ModelOption {
  name: string;
  displayName: string;
--- a/src/ipc/utils/get_model_client.ts
+++ b/src/ipc/utils/get_model_client.ts
@@ -116,6 +116,7 @@ export async function getModelClient(
        ? createDyadEngine({
            apiKey: dyadApiKey,
            baseURL: dyadEngineUrl ?? "https://engine.dyad.sh/v1",
+            originalProviderId: model.provider,
            dyadOptions: {
              enableLazyEdits: settings.enableProLazyEditsMode,
              enableSmartFilesContext: settings.enableProSmartFilesContextMode,
@@ -150,7 +151,7 @@ export async function getModelClient(
              }
            : undefined,
        ),
-        builtinProviderId: "auto",
+        builtinProviderId: model.provider,
      };

      return {
--- a/src/ipc/utils/llm_engine_provider.ts
+++ b/src/ipc/utils/llm_engine_provider.ts
@@ -11,6 +11,7 @@ import {

 import { OpenAICompatibleChatSettings } from "@ai-sdk/openai-compatible";
 import log from "electron-log";
+import { getExtraProviderOptions } from "./thinking_utils";

 const logger = log.scope("llm_engine_provider");

@@ -42,6 +43,7 @@ or to provide a custom fetch implementation for e.g. testing.
 */
  fetch?: FetchFunction;

+  originalProviderId: string;
  dyadOptions: {
    enableLazyEdits?: boolean;
    enableSmartFilesContext?: boolean;
@@ -113,42 +115,43 @@ export function createDyadEngine(
      defaultObjectGenerationMode:
        "tool" as LanguageModelV1ObjectGenerationMode,
      // Custom fetch implementation that adds files to the request
-      fetch: files?.length
-        ? (input: RequestInfo | URL, init?: RequestInit) => {
-            // Use default fetch if no init or body
-            if (!init || !init.body || typeof init.body !== "string") {
-              return (options.fetch || fetch)(input, init);
-            }
+      fetch: (input: RequestInfo | URL, init?: RequestInit) => {
+        // Use default fetch if no init or body
+        if (!init || !init.body || typeof init.body !== "string") {
+          return (options.fetch || fetch)(input, init);
+        }

-            try {
-              // Parse the request body to manipulate it
-              const parsedBody = JSON.parse(init.body);
+        try {
+          // Parse the request body to manipulate it
+          const parsedBody = {
+            ...JSON.parse(init.body),
+            ...getExtraProviderOptions(options.originalProviderId),
+          };

-              // Add files to the request if they exist
-              if (files?.length) {
-                parsedBody.dyad_options = {
-                  files,
-                  enable_lazy_edits: options.dyadOptions.enableLazyEdits,
-                  enable_smart_files_context:
-                    options.dyadOptions.enableSmartFilesContext,
-                };
-              }
-
-              // Return modified request with files included
-              const modifiedInit = {
-                ...init,
-                body: JSON.stringify(parsedBody),
-              };
-
-              // Use the provided fetch or default fetch
-              return (options.fetch || fetch)(input, modifiedInit);
-            } catch (e) {
-              logger.error("Error parsing request body", e);
-              // If parsing fails, use original request
-              return (options.fetch || fetch)(input, init);
-            }
+          // Add files to the request if they exist
+          if (files?.length) {
+            parsedBody.dyad_options = {
+              files,
+              enable_lazy_edits: options.dyadOptions.enableLazyEdits,
+              enable_smart_files_context:
+                options.dyadOptions.enableSmartFilesContext,
+            };
          }
-        : options.fetch,
+
+          // Return modified request with files included
+          const modifiedInit = {
+            ...init,
+            body: JSON.stringify(parsedBody),
+          };
+
+          // Use the provided fetch or default fetch
+          return (options.fetch || fetch)(input, modifiedInit);
+        } catch (e) {
+          logger.error("Error parsing request body", e);
+          // If parsing fails, use original request
+          return (options.fetch || fetch)(input, init);
+        }
+      },
    };

    return new OpenAICompatibleChatLanguageModel(modelId, restSettings, config);
--- a/src/ipc/utils/thinking_utils.ts
+++ b/src/ipc/utils/thinking_utils.ts
@@ -0,0 +1,18 @@
+import { PROVIDERS_THAT_SUPPORT_THINKING } from "../shared/language_model_helpers";
+
+export function getExtraProviderOptions(
+  providerId: string | undefined,
+): Record<string, any> {
+  if (!providerId) {
+    return {};
+  }
+  if (PROVIDERS_THAT_SUPPORT_THINKING.includes(providerId)) {
+    return {
+      thinking: {
+        type: "enabled",
+        include_thoughts: true,
+      },
+    };
+  }
+  return {};
+}
--- a/src/prompts/system_prompt.ts
+++ b/src/prompts/system_prompt.ts
@@ -338,8 +338,6 @@ Do not hesitate to extensively use console logs to follow the flow of the code.
 DO NOT OVERENGINEER THE CODE. You take great pride in keeping things simple and elegant. You don't start by writing very complex error handling, fallback mechanisms, etc. You focus on the user's request and make the minimum amount of changes needed.
 DON'T DO MORE THAN WHAT THE USER ASKS FOR.

-${THINKING_PROMPT}
-
 [[AI_RULES]]

 Directory names MUST be all lower-case (src/pages, src/components, etc.). File names may use mixed-case if you like.