import express, { Request, Response } from "express";
import { createServer } from "http";
import cors from "cors";
import fs from "fs";
import path from "path";
// Create Express app
const app = express();
app.use(cors());
app.use(express.json());
const PORT = 3500;
// Helper function to create OpenAI-like streaming response chunks
function createStreamChunk(
content: string,
role: string = "assistant",
isLast: boolean = false,
) {
const chunk = {
id: `chatcmpl-${Date.now()}`,
object: "chat.completion.chunk",
created: Math.floor(Date.now() / 1000),
model: "fake-model",
choices: [
{
index: 0,
delta: isLast ? {} : { content, role },
finish_reason: isLast ? "stop" : null,
},
],
};
return `data: ${JSON.stringify(chunk)}\n\n${isLast ? "data: [DONE]\n\n" : ""}`;
}
const CANNED_MESSAGE = `
\`\`:
I'll think about the problem and write a bug report.
Fake dyad write
A file (2)
More
EOM`;
app.get("/health", (req, res) => {
res.send("OK");
});
// Ollama-specific endpoints
app.get("/ollama/api/tags", (req, res) => {
const ollamaModels = {
models: [
{
name: "testollama",
modified_at: "2024-05-01T10:00:00.000Z",
size: 4700000000,
digest: "abcdef123456",
details: {
format: "gguf",
family: "llama",
families: ["llama"],
parameter_size: "8B",
quantization_level: "Q4_0",
},
},
{
name: "codellama:7b",
modified_at: "2024-04-25T12:30:00.000Z",
size: 3800000000,
digest: "fedcba654321",
details: {
format: "gguf",
family: "llama",
families: ["llama", "codellama"],
parameter_size: "7B",
quantization_level: "Q5_K_M",
},
},
],
};
console.log("* Sending fake Ollama models");
res.json(ollamaModels);
});
let globalCounter = 0;
app.post("/ollama/chat", (req, res) => {
// Tell the client we're going to stream NDJSON
res.setHeader("Content-Type", "application/x-ndjson");
res.setHeader("Cache-Control", "no-cache");
// Chunk #1 – partial answer
const firstChunk = {
model: "llama3.2",
created_at: "2023-08-04T08:52:19.385406455-07:00",
message: {
role: "assistant",
content: "ollamachunk",
images: null,
},
done: false,
};
// Chunk #2 – final answer + metrics
const secondChunk = {
model: "llama3.2",
created_at: "2023-08-04T19:22:45.499127Z",
message: {
role: "assistant",
content: "",
},
done: true,
total_duration: 4883583458,
load_duration: 1334875,
prompt_eval_count: 26,
prompt_eval_duration: 342546000,
eval_count: 282,
eval_duration: 4535599000,
};
// Send the first object right away
res.write(JSON.stringify(firstChunk) + "\n");
res.write(JSON.stringify(firstChunk) + "\n");
// …and the second one a moment later to mimic streaming
setTimeout(() => {
res.write(JSON.stringify(secondChunk) + "\n");
res.end(); // Close the HTTP stream
}, 300); // 300 ms delay – tweak as you like
});
// LM Studio specific endpoints
app.get("/lmstudio/api/v0/models", (req, res) => {
const lmStudioModels = {
data: [
{
type: "llm",
id: "lmstudio-model-1",
object: "model",
publisher: "lmstudio",
state: "loaded",
max_context_length: 4096,
quantization: "Q4_0",
compatibility_type: "gguf",
arch: "llama",
},
{
type: "llm",
id: "lmstudio-model-2-chat",
object: "model",
publisher: "lmstudio",
state: "not-loaded",
max_context_length: 8192,
quantization: "Q5_K_M",
compatibility_type: "gguf",
arch: "mixtral",
},
{
type: "embedding", // Should be filtered out by client
id: "lmstudio-embedding-model",
object: "model",
publisher: "lmstudio",
state: "loaded",
max_context_length: 2048,
quantization: "F16",
compatibility_type: "gguf",
arch: "bert",
},
],
};
console.log("* Sending fake LM Studio models");
res.json(lmStudioModels);
});
app.post("/lmstudio/v1/chat/completions", chatCompletionHandler);
// Handle POST requests to /v1/chat/completions
app.post("/v1/chat/completions", chatCompletionHandler);
function chatCompletionHandler(req: Request, res: Response) {
const { stream = false, messages = [] } = req.body;
console.log("* Received messages", messages);
// Check if the last message contains "[429]" to simulate rate limiting
const lastMessage = messages[messages.length - 1];
if (lastMessage && lastMessage.content === "[429]") {
return res.status(429).json({
error: {
message: "Too many requests. Please try again later.",
type: "rate_limit_error",
param: null,
code: "rate_limit_exceeded",
},
});
}
let messageContent = CANNED_MESSAGE;
// Check if the last message is "[dump]" to write messages to file and return path
if (lastMessage && lastMessage.content === "[dump]") {
const timestamp = Date.now();
const generatedDir = path.join(__dirname, "generated");
// Create generated directory if it doesn't exist
if (!fs.existsSync(generatedDir)) {
fs.mkdirSync(generatedDir, { recursive: true });
}
const dumpFilePath = path.join(generatedDir, `${timestamp}.json`);
try {
fs.writeFileSync(
dumpFilePath,
JSON.stringify(messages, null, 2),
"utf-8",
);
console.log(`* Dumped messages to: ${dumpFilePath}`);
messageContent = `[[dyad-dump-path=${dumpFilePath}]]`;
} catch (error) {
console.error(`* Error writing dump file: ${error}`);
messageContent = `Error: Could not write dump file: ${error}`;
}
}
if (lastMessage && lastMessage.content === "[increment]") {
globalCounter++;
messageContent = `counter=${globalCounter}`;
}
// Check if the last message starts with "tc=" to load test case file
if (
lastMessage &&
lastMessage.content &&
lastMessage.content.startsWith("tc=")
) {
const testCaseName = lastMessage.content.slice(3); // Remove "tc=" prefix
const testFilePath = path.join(
__dirname,
"..",
"..",
"..",
"e2e-tests",
"fixtures",
`${testCaseName}.md`,
);
try {
if (fs.existsSync(testFilePath)) {
messageContent = fs.readFileSync(testFilePath, "utf-8");
console.log(`* Loaded test case: ${testCaseName}`);
} else {
console.log(`* Test case file not found: ${testFilePath}`);
messageContent = `Error: Test case file not found: ${testCaseName}.md`;
}
} catch (error) {
console.error(`* Error reading test case file: ${error}`);
messageContent = `Error: Could not read test case file: ${testCaseName}.md`;
}
}
// Non-streaming response
if (!stream) {
return res.json({
id: `chatcmpl-${Date.now()}`,
object: "chat.completion",
created: Math.floor(Date.now() / 1000),
model: "fake-model",
choices: [
{
index: 0,
message: {
role: "assistant",
content: messageContent,
},
finish_reason: "stop",
},
],
});
}
// Streaming response
res.setHeader("Content-Type", "text/event-stream");
res.setHeader("Cache-Control", "no-cache");
res.setHeader("Connection", "keep-alive");
// Split the message into characters to simulate streaming
const message = messageContent;
const messageChars = message.split("");
// Stream each character with a delay
let index = 0;
const batchSize = 8;
// Send role first
res.write(createStreamChunk("", "assistant"));
const interval = setInterval(() => {
if (index < messageChars.length) {
// Get the next batch of characters (up to batchSize)
const batch = messageChars.slice(index, index + batchSize).join("");
res.write(createStreamChunk(batch));
index += batchSize;
} else {
// Send the final chunk
res.write(createStreamChunk("", "assistant", true));
clearInterval(interval);
res.end();
}
}, 10);
}
// Start the server
const server = createServer(app);
server.listen(PORT, () => {
console.log(`Fake LLM server running on http://localhost:${PORT}`);
});
// Handle SIGINT (Ctrl+C)
process.on("SIGINT", () => {
console.log("Shutting down fake LLM server");
server.close(() => {
console.log("Server closed");
process.exit(0);
});
});