Implement saver mode (#154)

This commit is contained in:
Will Chen
2025-05-13 15:34:41 -07:00
committed by GitHub
parent 3763423dc7
commit 069c221292
11 changed files with 1630 additions and 15 deletions

90
testing/fake-llm-server/dist/index.js vendored Normal file
View File

@@ -0,0 +1,90 @@
"use strict";
var __importDefault =
(this && this.__importDefault) ||
function (mod) {
return mod && mod.__esModule ? mod : { default: mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const express_1 = __importDefault(require("express"));
const http_1 = require("http");
const cors_1 = __importDefault(require("cors"));
// Create Express app
const app = (0, express_1.default)();
app.use((0, cors_1.default)());
app.use(express_1.default.json());
const PORT = 3500;
// Helper function to create OpenAI-like streaming response chunks
function createStreamChunk(content, role = "assistant", isLast = false) {
const chunk = {
id: `chatcmpl-${Date.now()}`,
object: "chat.completion.chunk",
created: Math.floor(Date.now() / 1000),
model: "fake-model",
choices: [
{
index: 0,
delta: isLast ? {} : { content, role },
finish_reason: isLast ? "stop" : null,
},
],
};
return `data: ${JSON.stringify(chunk)}\n\n${isLast ? "data: [DONE]\n\n" : ""}`;
}
// Handle POST requests to /v1/chat/completions
app.post("/v1/chat/completions", (req, res) => {
const { stream = false } = req.body;
// Non-streaming response
if (!stream) {
return res.json({
id: `chatcmpl-${Date.now()}`,
object: "chat.completion",
created: Math.floor(Date.now() / 1000),
model: "fake-model",
choices: [
{
index: 0,
message: {
role: "assistant",
content: "hello world",
},
finish_reason: "stop",
},
],
});
}
// Streaming response
res.setHeader("Content-Type", "text/event-stream");
res.setHeader("Cache-Control", "no-cache");
res.setHeader("Connection", "keep-alive");
// Split the "hello world" message into characters to simulate streaming
const message = "hello world";
const messageChars = message.split("");
// Stream each character with a delay
let index = 0;
// Send role first
res.write(createStreamChunk("", "assistant"));
const interval = setInterval(() => {
if (index < messageChars.length) {
res.write(createStreamChunk(messageChars[index]));
index++;
} else {
// Send the final chunk
res.write(createStreamChunk("", "assistant", true));
clearInterval(interval);
res.end();
}
}, 100);
});
// Start the server
const server = (0, http_1.createServer)(app);
server.listen(PORT, () => {
console.log(`Fake LLM server running on http://localhost:${PORT}`);
});
// Handle SIGINT (Ctrl+C)
process.on("SIGINT", () => {
console.log("Shutting down fake LLM server");
server.close(() => {
console.log("Server closed");
process.exit(0);
});
});