289 lines
7.4 KiB
TypeScript
289 lines
7.4 KiB
TypeScript
import express from "express";
|
||
import { createServer } from "http";
|
||
import cors from "cors";
|
||
import fs from "fs";
|
||
import path from "path";
|
||
|
||
// Create Express app
|
||
const app = express();
|
||
app.use(cors());
|
||
app.use(express.json());
|
||
|
||
const PORT = 3500;
|
||
|
||
// Helper function to create OpenAI-like streaming response chunks
|
||
function createStreamChunk(
|
||
content: string,
|
||
role: string = "assistant",
|
||
isLast: boolean = false,
|
||
) {
|
||
const chunk = {
|
||
id: `chatcmpl-${Date.now()}`,
|
||
object: "chat.completion.chunk",
|
||
created: Math.floor(Date.now() / 1000),
|
||
model: "fake-model",
|
||
choices: [
|
||
{
|
||
index: 0,
|
||
delta: isLast ? {} : { content, role },
|
||
finish_reason: isLast ? "stop" : null,
|
||
},
|
||
],
|
||
};
|
||
|
||
return `data: ${JSON.stringify(chunk)}\n\n${isLast ? "data: [DONE]\n\n" : ""}`;
|
||
}
|
||
|
||
const CANNED_MESSAGE = `
|
||
<think>
|
||
\`<dyad-write>\`:
|
||
I'll think about the problem and write a bug report.
|
||
|
||
<dyad-write>
|
||
|
||
<dyad-write path="file1.txt">
|
||
Fake dyad write
|
||
</dyad-write>
|
||
</think>
|
||
|
||
<dyad-write path="file1.txt">
|
||
A file (2)
|
||
</dyad-write>
|
||
More
|
||
EOM`;
|
||
|
||
app.get("/health", (req, res) => {
|
||
res.send("OK");
|
||
});
|
||
|
||
// Ollama-specific endpoints
|
||
app.get("/ollama/api/tags", (req, res) => {
|
||
const ollamaModels = {
|
||
models: [
|
||
{
|
||
name: "testollama",
|
||
modified_at: "2024-05-01T10:00:00.000Z",
|
||
size: 4700000000,
|
||
digest: "abcdef123456",
|
||
details: {
|
||
format: "gguf",
|
||
family: "llama",
|
||
families: ["llama"],
|
||
parameter_size: "8B",
|
||
quantization_level: "Q4_0",
|
||
},
|
||
},
|
||
{
|
||
name: "codellama:7b",
|
||
modified_at: "2024-04-25T12:30:00.000Z",
|
||
size: 3800000000,
|
||
digest: "fedcba654321",
|
||
details: {
|
||
format: "gguf",
|
||
family: "llama",
|
||
families: ["llama", "codellama"],
|
||
parameter_size: "7B",
|
||
quantization_level: "Q5_K_M",
|
||
},
|
||
},
|
||
],
|
||
};
|
||
console.log("* Sending fake Ollama models");
|
||
res.json(ollamaModels);
|
||
});
|
||
|
||
let globalCounter = 0;
|
||
|
||
app.post("/ollama/chat", (req, res) => {
|
||
// Tell the client we’re going to stream NDJSON
|
||
res.setHeader("Content-Type", "application/x-ndjson");
|
||
res.setHeader("Cache-Control", "no-cache");
|
||
|
||
// Chunk #1 – partial answer
|
||
const firstChunk = {
|
||
model: "llama3.2",
|
||
created_at: "2023-08-04T08:52:19.385406455-07:00",
|
||
message: {
|
||
role: "assistant",
|
||
content: "ollamachunk",
|
||
images: null,
|
||
},
|
||
done: false,
|
||
};
|
||
|
||
// Chunk #2 – final answer + metrics
|
||
const secondChunk = {
|
||
model: "llama3.2",
|
||
created_at: "2023-08-04T19:22:45.499127Z",
|
||
message: {
|
||
role: "assistant",
|
||
content: "",
|
||
},
|
||
done: true,
|
||
total_duration: 4883583458,
|
||
load_duration: 1334875,
|
||
prompt_eval_count: 26,
|
||
prompt_eval_duration: 342546000,
|
||
eval_count: 282,
|
||
eval_duration: 4535599000,
|
||
};
|
||
|
||
// Send the first object right away
|
||
res.write(JSON.stringify(firstChunk) + "\n");
|
||
res.write(JSON.stringify(firstChunk) + "\n");
|
||
|
||
// …and the second one a moment later to mimic streaming
|
||
setTimeout(() => {
|
||
res.write(JSON.stringify(secondChunk) + "\n");
|
||
res.end(); // Close the HTTP stream
|
||
}, 300); // 300 ms delay – tweak as you like
|
||
});
|
||
|
||
// Handle POST requests to /v1/chat/completions
|
||
app.post("/v1/chat/completions", (req, res) => {
|
||
const { stream = false, messages = [] } = req.body;
|
||
console.log("* Received messages", messages);
|
||
|
||
// Check if the last message contains "[429]" to simulate rate limiting
|
||
const lastMessage = messages[messages.length - 1];
|
||
if (lastMessage && lastMessage.content === "[429]") {
|
||
return res.status(429).json({
|
||
error: {
|
||
message: "Too many requests. Please try again later.",
|
||
type: "rate_limit_error",
|
||
param: null,
|
||
code: "rate_limit_exceeded",
|
||
},
|
||
});
|
||
}
|
||
|
||
let messageContent = CANNED_MESSAGE;
|
||
|
||
// Check if the last message is "[dump]" to write messages to file and return path
|
||
if (lastMessage && lastMessage.content === "[dump]") {
|
||
const timestamp = Date.now();
|
||
const generatedDir = path.join(__dirname, "generated");
|
||
|
||
// Create generated directory if it doesn't exist
|
||
if (!fs.existsSync(generatedDir)) {
|
||
fs.mkdirSync(generatedDir, { recursive: true });
|
||
}
|
||
|
||
const dumpFilePath = path.join(generatedDir, `${timestamp}.json`);
|
||
|
||
try {
|
||
fs.writeFileSync(
|
||
dumpFilePath,
|
||
JSON.stringify(messages, null, 2),
|
||
"utf-8",
|
||
);
|
||
console.log(`* Dumped messages to: ${dumpFilePath}`);
|
||
messageContent = `[[dyad-dump-path=${dumpFilePath}]]`;
|
||
} catch (error) {
|
||
console.error(`* Error writing dump file: ${error}`);
|
||
messageContent = `Error: Could not write dump file: ${error}`;
|
||
}
|
||
}
|
||
|
||
if (lastMessage && lastMessage.content === "[increment]") {
|
||
globalCounter++;
|
||
messageContent = `counter=${globalCounter}`;
|
||
}
|
||
|
||
// Check if the last message starts with "tc=" to load test case file
|
||
if (
|
||
lastMessage &&
|
||
lastMessage.content &&
|
||
lastMessage.content.startsWith("tc=")
|
||
) {
|
||
const testCaseName = lastMessage.content.slice(3); // Remove "tc=" prefix
|
||
const testFilePath = path.join(
|
||
__dirname,
|
||
"..",
|
||
"..",
|
||
"..",
|
||
"e2e-tests",
|
||
"fixtures",
|
||
`${testCaseName}.md`,
|
||
);
|
||
|
||
try {
|
||
if (fs.existsSync(testFilePath)) {
|
||
messageContent = fs.readFileSync(testFilePath, "utf-8");
|
||
console.log(`* Loaded test case: ${testCaseName}`);
|
||
} else {
|
||
console.log(`* Test case file not found: ${testFilePath}`);
|
||
messageContent = `Error: Test case file not found: ${testCaseName}.md`;
|
||
}
|
||
} catch (error) {
|
||
console.error(`* Error reading test case file: ${error}`);
|
||
messageContent = `Error: Could not read test case file: ${testCaseName}.md`;
|
||
}
|
||
}
|
||
|
||
// Non-streaming response
|
||
if (!stream) {
|
||
return res.json({
|
||
id: `chatcmpl-${Date.now()}`,
|
||
object: "chat.completion",
|
||
created: Math.floor(Date.now() / 1000),
|
||
model: "fake-model",
|
||
choices: [
|
||
{
|
||
index: 0,
|
||
message: {
|
||
role: "assistant",
|
||
content: messageContent,
|
||
},
|
||
finish_reason: "stop",
|
||
},
|
||
],
|
||
});
|
||
}
|
||
|
||
// Streaming response
|
||
res.setHeader("Content-Type", "text/event-stream");
|
||
res.setHeader("Cache-Control", "no-cache");
|
||
res.setHeader("Connection", "keep-alive");
|
||
|
||
// Split the message into characters to simulate streaming
|
||
const message = messageContent;
|
||
const messageChars = message.split("");
|
||
|
||
// Stream each character with a delay
|
||
let index = 0;
|
||
const batchSize = 8;
|
||
|
||
// Send role first
|
||
res.write(createStreamChunk("", "assistant"));
|
||
|
||
const interval = setInterval(() => {
|
||
if (index < messageChars.length) {
|
||
// Get the next batch of characters (up to batchSize)
|
||
const batch = messageChars.slice(index, index + batchSize).join("");
|
||
res.write(createStreamChunk(batch));
|
||
index += batchSize;
|
||
} else {
|
||
// Send the final chunk
|
||
res.write(createStreamChunk("", "assistant", true));
|
||
clearInterval(interval);
|
||
res.end();
|
||
}
|
||
}, 10);
|
||
});
|
||
|
||
// Start the server
|
||
const server = createServer(app);
|
||
server.listen(PORT, () => {
|
||
console.log(`Fake LLM server running on http://localhost:${PORT}`);
|
||
});
|
||
|
||
// Handle SIGINT (Ctrl+C)
|
||
process.on("SIGINT", () => {
|
||
console.log("Shutting down fake LLM server");
|
||
server.close(() => {
|
||
console.log("Server closed");
|
||
process.exit(0);
|
||
});
|
||
});
|