diff --git a/e2e-tests/helpers/test_helper.ts b/e2e-tests/helpers/test_helper.ts index 30e3045..4fd0438 100644 --- a/e2e-tests/helpers/test_helper.ts +++ b/e2e-tests/helpers/test_helper.ts @@ -73,6 +73,13 @@ class PageObject { await this.page.getByText("test-model").click(); } + async selectTestOllamaModel() { + await this.page.getByRole("button", { name: "Model: Auto" }).click(); + await this.page.getByText("Local models").click(); + await this.page.getByText("Ollama", { exact: true }).click(); + await this.page.getByText("Testollama", { exact: true }).click(); + } + async setUpTestProvider() { await this.page.getByText("Add custom providerConnect to").click(); // Fill out provider dialog @@ -201,6 +208,7 @@ export const test = base.extend<{ const latestBuild = findLatestBuild(); // parse the directory and find paths and other info const appInfo = parseElectronApp(latestBuild); + process.env.OLLAMA_HOST = "http://localhost:3500/ollama"; process.env.E2E_TEST_BUILD = "true"; // This is just a hack to avoid the AI setup screen. process.env.OPENAI_API_KEY = "sk-test"; diff --git a/e2e-tests/ollama.spec.ts b/e2e-tests/ollama.spec.ts new file mode 100644 index 0000000..7a4712e --- /dev/null +++ b/e2e-tests/ollama.spec.ts @@ -0,0 +1,7 @@ +import { test } from "./helpers/test_helper"; + +test("send message to ollama", async ({ po }) => { + await po.selectTestOllamaModel(); + await po.sendPrompt("hi"); + await po.snapshotMessages(); +}); diff --git a/e2e-tests/snapshots/ollama.spec.ts_send-message-to-ollama-1.aria.yml b/e2e-tests/snapshots/ollama.spec.ts_send-message-to-ollama-1.aria.yml new file mode 100644 index 0000000..c26e957 --- /dev/null +++ b/e2e-tests/snapshots/ollama.spec.ts_send-message-to-ollama-1.aria.yml @@ -0,0 +1,4 @@ +- paragraph: hi +- paragraph: ollamachunkollamachunk +- button "Retry": + - img \ No newline at end of file diff --git a/src/ipc/handlers/local_model_ollama_handler.ts b/src/ipc/handlers/local_model_ollama_handler.ts index 8b9c9cf..c231ce6 100644 --- a/src/ipc/handlers/local_model_ollama_handler.ts +++ b/src/ipc/handlers/local_model_ollama_handler.ts @@ -4,7 +4,7 @@ import { LocalModelListResponse, LocalModel } from "../ipc_types"; const logger = log.scope("ollama_handler"); -const OLLAMA_API_URL = "http://localhost:11434"; +const OLLAMA_API_URL = process.env.OLLAMA_HOST || "http://localhost:11434"; interface OllamaModel { name: string; diff --git a/src/ipc/utils/get_model_client.ts b/src/ipc/utils/get_model_client.ts index d04f1f8..0293bc4 100644 --- a/src/ipc/utils/get_model_client.ts +++ b/src/ipc/utils/get_model_client.ts @@ -246,7 +246,7 @@ function getRegularModelClient( case "ollama": { // Ollama typically runs locally and doesn't require an API key in the same way const provider = createOllama({ - baseURL: providerConfig.apiBaseUrl, + baseURL: process.env.OLLAMA_HOST, }); return { modelClient: { diff --git a/testing/fake-llm-server/index.ts b/testing/fake-llm-server/index.ts index 759d5ec..b8668d7 100644 --- a/testing/fake-llm-server/index.ts +++ b/testing/fake-llm-server/index.ts @@ -56,8 +56,89 @@ app.get("/health", (req, res) => { res.send("OK"); }); +// Ollama-specific endpoints +app.get("/ollama/api/tags", (req, res) => { + const ollamaModels = { + models: [ + { + name: "testollama", + modified_at: "2024-05-01T10:00:00.000Z", + size: 4700000000, + digest: "abcdef123456", + details: { + format: "gguf", + family: "llama", + families: ["llama"], + parameter_size: "8B", + quantization_level: "Q4_0", + }, + }, + { + name: "codellama:7b", + modified_at: "2024-04-25T12:30:00.000Z", + size: 3800000000, + digest: "fedcba654321", + details: { + format: "gguf", + family: "llama", + families: ["llama", "codellama"], + parameter_size: "7B", + quantization_level: "Q5_K_M", + }, + }, + ], + }; + console.log("* Sending fake Ollama models"); + res.json(ollamaModels); +}); + let globalCounter = 0; +app.post("/ollama/chat", (req, res) => { + // Tell the client we’re going to stream NDJSON + res.setHeader("Content-Type", "application/x-ndjson"); + res.setHeader("Cache-Control", "no-cache"); + + // Chunk #1 – partial answer + const firstChunk = { + model: "llama3.2", + created_at: "2023-08-04T08:52:19.385406455-07:00", + message: { + role: "assistant", + content: "ollamachunk", + images: null, + }, + done: false, + }; + + // Chunk #2 – final answer + metrics + const secondChunk = { + model: "llama3.2", + created_at: "2023-08-04T19:22:45.499127Z", + message: { + role: "assistant", + content: "", + }, + done: true, + total_duration: 4883583458, + load_duration: 1334875, + prompt_eval_count: 26, + prompt_eval_duration: 342546000, + eval_count: 282, + eval_duration: 4535599000, + }; + + // Send the first object right away + res.write(JSON.stringify(firstChunk) + "\n"); + res.write(JSON.stringify(firstChunk) + "\n"); + + // …and the second one a moment later to mimic streaming + setTimeout(() => { + res.write(JSON.stringify(secondChunk) + "\n"); + res.end(); // Close the HTTP stream + }, 300); // 300 ms delay – tweak as you like +}); + // Handle POST requests to /v1/chat/completions app.post("/v1/chat/completions", (req, res) => { const { stream = false, messages = [] } = req.body; @@ -188,7 +269,7 @@ app.post("/v1/chat/completions", (req, res) => { clearInterval(interval); res.end(); } - }, 1); + }, 10); }); // Start the server