import express, { Request, Response } from "express"; import { createServer } from "http"; import cors from "cors"; import fs from "fs"; import path from "path"; // Create Express app const app = express(); app.use(cors()); app.use(express.json()); const PORT = 3500; // Helper function to create OpenAI-like streaming response chunks function createStreamChunk( content: string, role: string = "assistant", isLast: boolean = false, ) { const chunk = { id: `chatcmpl-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: "fake-model", choices: [ { index: 0, delta: isLast ? {} : { content, role }, finish_reason: isLast ? "stop" : null, }, ], }; return `data: ${JSON.stringify(chunk)}\n\n${isLast ? "data: [DONE]\n\n" : ""}`; } const CANNED_MESSAGE = ` \`\`: I'll think about the problem and write a bug report. Fake dyad write A file (2) More EOM`; app.get("/health", (req, res) => { res.send("OK"); }); // Ollama-specific endpoints app.get("/ollama/api/tags", (req, res) => { const ollamaModels = { models: [ { name: "testollama", modified_at: "2024-05-01T10:00:00.000Z", size: 4700000000, digest: "abcdef123456", details: { format: "gguf", family: "llama", families: ["llama"], parameter_size: "8B", quantization_level: "Q4_0", }, }, { name: "codellama:7b", modified_at: "2024-04-25T12:30:00.000Z", size: 3800000000, digest: "fedcba654321", details: { format: "gguf", family: "llama", families: ["llama", "codellama"], parameter_size: "7B", quantization_level: "Q5_K_M", }, }, ], }; console.log("* Sending fake Ollama models"); res.json(ollamaModels); }); let globalCounter = 0; app.post("/ollama/chat", (req, res) => { // Tell the client we're going to stream NDJSON res.setHeader("Content-Type", "application/x-ndjson"); res.setHeader("Cache-Control", "no-cache"); // Chunk #1 – partial answer const firstChunk = { model: "llama3.2", created_at: "2023-08-04T08:52:19.385406455-07:00", message: { role: "assistant", content: "ollamachunk", images: null, }, done: false, }; // Chunk #2 – final answer + metrics const secondChunk = { model: "llama3.2", created_at: "2023-08-04T19:22:45.499127Z", message: { role: "assistant", content: "", }, done: true, total_duration: 4883583458, load_duration: 1334875, prompt_eval_count: 26, prompt_eval_duration: 342546000, eval_count: 282, eval_duration: 4535599000, }; // Send the first object right away res.write(JSON.stringify(firstChunk) + "\n"); res.write(JSON.stringify(firstChunk) + "\n"); // …and the second one a moment later to mimic streaming setTimeout(() => { res.write(JSON.stringify(secondChunk) + "\n"); res.end(); // Close the HTTP stream }, 300); // 300 ms delay – tweak as you like }); // LM Studio specific endpoints app.get("/lmstudio/api/v0/models", (req, res) => { const lmStudioModels = { data: [ { type: "llm", id: "lmstudio-model-1", object: "model", publisher: "lmstudio", state: "loaded", max_context_length: 4096, quantization: "Q4_0", compatibility_type: "gguf", arch: "llama", }, { type: "llm", id: "lmstudio-model-2-chat", object: "model", publisher: "lmstudio", state: "not-loaded", max_context_length: 8192, quantization: "Q5_K_M", compatibility_type: "gguf", arch: "mixtral", }, { type: "embedding", // Should be filtered out by client id: "lmstudio-embedding-model", object: "model", publisher: "lmstudio", state: "loaded", max_context_length: 2048, quantization: "F16", compatibility_type: "gguf", arch: "bert", }, ], }; console.log("* Sending fake LM Studio models"); res.json(lmStudioModels); }); app.post("/lmstudio/v1/chat/completions", chatCompletionHandler); // Handle POST requests to /v1/chat/completions app.post("/v1/chat/completions", chatCompletionHandler); function chatCompletionHandler(req: Request, res: Response) { const { stream = false, messages = [] } = req.body; console.log("* Received messages", messages); // Check if the last message contains "[429]" to simulate rate limiting const lastMessage = messages[messages.length - 1]; if (lastMessage && lastMessage.content === "[429]") { return res.status(429).json({ error: { message: "Too many requests. Please try again later.", type: "rate_limit_error", param: null, code: "rate_limit_exceeded", }, }); } let messageContent = CANNED_MESSAGE; // Check if the last message is "[dump]" to write messages to file and return path if (lastMessage && lastMessage.content === "[dump]") { const timestamp = Date.now(); const generatedDir = path.join(__dirname, "generated"); // Create generated directory if it doesn't exist if (!fs.existsSync(generatedDir)) { fs.mkdirSync(generatedDir, { recursive: true }); } const dumpFilePath = path.join(generatedDir, `${timestamp}.json`); try { fs.writeFileSync( dumpFilePath, JSON.stringify(messages, null, 2), "utf-8", ); console.log(`* Dumped messages to: ${dumpFilePath}`); messageContent = `[[dyad-dump-path=${dumpFilePath}]]`; } catch (error) { console.error(`* Error writing dump file: ${error}`); messageContent = `Error: Could not write dump file: ${error}`; } } if (lastMessage && lastMessage.content === "[increment]") { globalCounter++; messageContent = `counter=${globalCounter}`; } // Check if the last message starts with "tc=" to load test case file if ( lastMessage && lastMessage.content && lastMessage.content.startsWith("tc=") ) { const testCaseName = lastMessage.content.slice(3); // Remove "tc=" prefix const testFilePath = path.join( __dirname, "..", "..", "..", "e2e-tests", "fixtures", `${testCaseName}.md`, ); try { if (fs.existsSync(testFilePath)) { messageContent = fs.readFileSync(testFilePath, "utf-8"); console.log(`* Loaded test case: ${testCaseName}`); } else { console.log(`* Test case file not found: ${testFilePath}`); messageContent = `Error: Test case file not found: ${testCaseName}.md`; } } catch (error) { console.error(`* Error reading test case file: ${error}`); messageContent = `Error: Could not read test case file: ${testCaseName}.md`; } } // Non-streaming response if (!stream) { return res.json({ id: `chatcmpl-${Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: "fake-model", choices: [ { index: 0, message: { role: "assistant", content: messageContent, }, finish_reason: "stop", }, ], }); } // Streaming response res.setHeader("Content-Type", "text/event-stream"); res.setHeader("Cache-Control", "no-cache"); res.setHeader("Connection", "keep-alive"); // Split the message into characters to simulate streaming const message = messageContent; const messageChars = message.split(""); // Stream each character with a delay let index = 0; const batchSize = 8; // Send role first res.write(createStreamChunk("", "assistant")); const interval = setInterval(() => { if (index < messageChars.length) { // Get the next batch of characters (up to batchSize) const batch = messageChars.slice(index, index + batchSize).join(""); res.write(createStreamChunk(batch)); index += batchSize; } else { // Send the final chunk res.write(createStreamChunk("", "assistant", true)); clearInterval(interval); res.end(); } }, 10); } // Start the server const server = createServer(app); server.listen(PORT, () => { console.log(`Fake LLM server running on http://localhost:${PORT}`); }); // Handle SIGINT (Ctrl+C) process.on("SIGINT", () => { console.log("Shutting down fake LLM server"); server.close(() => { console.log("Server closed"); process.exit(0); }); });