137 lines
3.2 KiB
TypeScript
137 lines
3.2 KiB
TypeScript
import express from "express";
|
|
import { createServer } from "http";
|
|
import cors from "cors";
|
|
|
|
// Create Express app
|
|
const app = express();
|
|
app.use(cors());
|
|
app.use(express.json());
|
|
|
|
const PORT = 3500;
|
|
|
|
// Helper function to create OpenAI-like streaming response chunks
|
|
function createStreamChunk(
|
|
content: string,
|
|
role: string = "assistant",
|
|
isLast: boolean = false,
|
|
) {
|
|
const chunk = {
|
|
id: `chatcmpl-${Date.now()}`,
|
|
object: "chat.completion.chunk",
|
|
created: Math.floor(Date.now() / 1000),
|
|
model: "fake-model",
|
|
choices: [
|
|
{
|
|
index: 0,
|
|
delta: isLast ? {} : { content, role },
|
|
finish_reason: isLast ? "stop" : null,
|
|
},
|
|
],
|
|
};
|
|
|
|
return `data: ${JSON.stringify(chunk)}\n\n${isLast ? "data: [DONE]\n\n" : ""}`;
|
|
}
|
|
|
|
const CANNED_MESSAGE = `
|
|
<think>
|
|
\`<dyad-write>\`:
|
|
I'll think about the problem and write a bug report.
|
|
|
|
<dyad-write>
|
|
|
|
<dyad-write path="file1.txt">
|
|
Fake dyad write
|
|
</dyad-write>
|
|
</think>
|
|
|
|
<dyad-write path="file1.txt">
|
|
A file (2)
|
|
</dyad-write>
|
|
More
|
|
EOM`;
|
|
|
|
app.get("/health", (req, res) => {
|
|
res.send("OK");
|
|
});
|
|
|
|
// Handle POST requests to /v1/chat/completions
|
|
app.post("/v1/chat/completions", (req, res) => {
|
|
const { stream = false, messages = [] } = req.body;
|
|
console.log("* Received messages", messages);
|
|
|
|
// Check if the last message contains "[429]" to simulate rate limiting
|
|
const lastMessage = messages[messages.length - 1];
|
|
if (lastMessage && lastMessage.content === "[429]") {
|
|
return res.status(429).json({
|
|
error: {
|
|
message: "Too many requests. Please try again later.",
|
|
type: "rate_limit_error",
|
|
param: null,
|
|
code: "rate_limit_exceeded",
|
|
},
|
|
});
|
|
}
|
|
|
|
// Non-streaming response
|
|
if (!stream) {
|
|
return res.json({
|
|
id: `chatcmpl-${Date.now()}`,
|
|
object: "chat.completion",
|
|
created: Math.floor(Date.now() / 1000),
|
|
model: "fake-model",
|
|
choices: [
|
|
{
|
|
index: 0,
|
|
message: {
|
|
role: "assistant",
|
|
content: CANNED_MESSAGE,
|
|
},
|
|
finish_reason: "stop",
|
|
},
|
|
],
|
|
});
|
|
}
|
|
|
|
// Streaming response
|
|
res.setHeader("Content-Type", "text/event-stream");
|
|
res.setHeader("Cache-Control", "no-cache");
|
|
res.setHeader("Connection", "keep-alive");
|
|
|
|
// Split the "hello world" message into characters to simulate streaming
|
|
const message = CANNED_MESSAGE;
|
|
const messageChars = message.split("");
|
|
|
|
// Stream each character with a delay
|
|
let index = 0;
|
|
|
|
// Send role first
|
|
res.write(createStreamChunk("", "assistant"));
|
|
|
|
const interval = setInterval(() => {
|
|
if (index < messageChars.length) {
|
|
res.write(createStreamChunk(messageChars[index]));
|
|
index++;
|
|
} else {
|
|
// Send the final chunk
|
|
res.write(createStreamChunk("", "assistant", true));
|
|
clearInterval(interval);
|
|
res.end();
|
|
}
|
|
}, 10);
|
|
});
|
|
|
|
// Start the server
|
|
const server = createServer(app);
|
|
server.listen(PORT, () => {
|
|
console.log(`Fake LLM server running on http://localhost:${PORT}`);
|
|
});
|
|
|
|
// Handle SIGINT (Ctrl+C)
|
|
process.on("SIGINT", () => {
|
|
console.log("Shutting down fake LLM server");
|
|
server.close(() => {
|
|
console.log("Server closed");
|
|
process.exit(0);
|
|
});
|
|
});
|