Optimize codebase extract & fix proposal handler token counting logic (#36)

* remove deprecated script
* Optimize codebase extract & fix proposal handler token counting logic
* add caching + lock
* sort by modified timestamp
* cache cleanup
This commit is contained in:
Will Chen
2025-04-28 21:39:16 -07:00
committed by GitHub
parent 813f170c68
commit 7d5595f630
3 changed files with 373 additions and 245 deletions

View File

@@ -1,116 +0,0 @@
#!/usr/bin/env node
// Add type module declaration at the top
// @ts-check
// @ts-ignore
// eslint-disable-next-line
// @ts-nocheck
import fs from "fs";
import path from "path";
import { fileURLToPath } from "url";
import { dirname } from "path";
import { isIgnored } from "isomorphic-git";
import log from "electron-log";
const logger = log.scope("extract-codebase");
// File extensions to include
const ALLOWED_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".css"];
// Function to check if a path is ignored by gitignore
async function isGitIgnored(
filePath: string,
baseDir: string
): Promise<boolean> {
try {
const relativePath = path.relative(baseDir, filePath);
return await isIgnored({ fs, dir: baseDir, filepath: relativePath });
} catch (error) {
logger.error(`Error checking if path is git ignored: ${filePath}`, error);
return false;
}
}
// Function to recursively walk a directory
async function walkDirectory(dir: string, baseDir: string): Promise<string[]> {
const files: string[] = [];
// Read directory contents
const entries = fs.readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
// Skip if the entry is git ignored
if (await isGitIgnored(fullPath, baseDir)) {
continue;
}
if (entry.isDirectory()) {
// Recursively process subdirectories
const subDirFiles = await walkDirectory(fullPath, baseDir);
files.push(...subDirFiles);
} else if (entry.isFile()) {
// Check file extension
const ext = path.extname(entry.name).toLowerCase();
if (ALLOWED_EXTENSIONS.includes(ext)) {
files.push(fullPath);
}
}
}
return files;
}
// Function to read file contents and format for LLM consumption
function formatFile(filePath: string, baseDir: string): string {
try {
const relativePath = path.relative(baseDir, filePath);
// Check if the file is in components/ui directory
if (
relativePath.includes("eslint.config") ||
relativePath.includes("components/ui") ||
relativePath.includes("components\\ui")
) {
return `## File: ${relativePath}\n\n`;
}
const content = fs.readFileSync(filePath, "utf-8");
return `## File: ${relativePath}\n\`\`\`${path
.extname(filePath)
.substring(1)}\n${content}\n\`\`\`\n\n`;
} catch (error) {
logger.error(`Error reading file: ${filePath}`, error);
return `## File: ${filePath}\nError reading file: ${error}\n\n`;
}
}
async function main() {
// Get command line arguments
const args = process.argv.slice(2);
const scaffoldDir = args[0] || process.cwd();
const outputFile = args[1] || "codebase-extract.md";
console.log(`Extracting code from: ${scaffoldDir}`);
console.log(`Output will be written to: ${outputFile}`);
// Walk directory and get all files
const files = await walkDirectory(scaffoldDir, scaffoldDir);
console.log(`Found ${files.length} code files`);
// Format files
let output = `# Codebase Extract\nGenerated on: ${new Date().toISOString()}\nTotal files: ${
files.length
}\n\n`;
for (const file of files) {
output += formatFile(file, scaffoldDir);
}
// Write to output file
fs.writeFileSync(outputFile, output);
console.log(`Extraction complete. Output written to ${outputFile}`);
}

View File

@@ -22,8 +22,14 @@ import {
} from "../processors/response_processor"; } from "../processors/response_processor";
import log from "electron-log"; import log from "electron-log";
import { isServerFunction } from "../../supabase_admin/supabase_utils"; import { isServerFunction } from "../../supabase_admin/supabase_utils";
import { estimateMessagesTokens, getContextWindow } from "../utils/token_utils"; import {
estimateMessagesTokens,
estimateTokens,
getContextWindow,
} from "../utils/token_utils";
import { extractCodebase } from "../../utils/codebase";
import { getDyadAppPath } from "../../paths/paths";
import { withLock } from "../utils/lock_utils";
const logger = log.scope("proposal_handlers"); const logger = log.scope("proposal_handlers");
// Placeholder Proposal data (can be removed or kept for reference) // Placeholder Proposal data (can be removed or kept for reference)
@@ -44,41 +50,204 @@ function isParsedProposal(obj: any): obj is ParsedProposal {
); );
} }
// Cache for codebase token counts
interface CodebaseTokenCache {
chatId: number;
messageId: number;
messageContent: string;
tokenCount: number;
timestamp: number;
}
// Cache expiration time (5 minutes)
const CACHE_EXPIRATION_MS = 5 * 60 * 1000;
// In-memory cache for codebase token counts
const codebaseTokenCache = new Map<number, CodebaseTokenCache>();
// Function to clean up expired cache entries
function cleanupExpiredCacheEntries() {
const now = Date.now();
let expiredCount = 0;
codebaseTokenCache.forEach((entry, key) => {
if (now - entry.timestamp > CACHE_EXPIRATION_MS) {
codebaseTokenCache.delete(key);
expiredCount++;
}
});
if (expiredCount > 0) {
logger.log(
`Cleaned up ${expiredCount} expired codebase token cache entries`
);
}
}
// Function to get cached token count or calculate and cache it
async function getCodebaseTokenCount(
chatId: number,
messageId: number,
messageContent: string,
appPath: string
): Promise<number> {
// Clean up expired cache entries first
cleanupExpiredCacheEntries();
const cacheEntry = codebaseTokenCache.get(chatId);
const now = Date.now();
// Check if cache is valid - same chat, message and content, and not expired
if (
cacheEntry &&
cacheEntry.messageId === messageId &&
cacheEntry.messageContent === messageContent &&
now - cacheEntry.timestamp < CACHE_EXPIRATION_MS
) {
logger.log(`Using cached codebase token count for chatId: ${chatId}`);
return cacheEntry.tokenCount;
}
// Calculate and cache the token count
logger.log(`Calculating codebase token count for chatId: ${chatId}`);
const codebase = await extractCodebase(getDyadAppPath(appPath));
const tokenCount = estimateTokens(codebase);
// Store in cache
codebaseTokenCache.set(chatId, {
chatId,
messageId,
messageContent,
tokenCount,
timestamp: now,
});
return tokenCount;
}
const getProposalHandler = async ( const getProposalHandler = async (
_event: IpcMainInvokeEvent, _event: IpcMainInvokeEvent,
{ chatId }: { chatId: number } { chatId }: { chatId: number }
): Promise<ProposalResult | null> => { ): Promise<ProposalResult | null> => {
logger.log(`IPC: get-proposal called for chatId: ${chatId}`); return withLock("get-proposal:" + chatId, async () => {
logger.log(`IPC: get-proposal called for chatId: ${chatId}`);
try { try {
// Find the latest ASSISTANT message for the chat // Find the latest ASSISTANT message for the chat
const latestAssistantMessage = await db.query.messages.findFirst({ const latestAssistantMessage = await db.query.messages.findFirst({
where: and(eq(messages.chatId, chatId), eq(messages.role, "assistant")), where: and(eq(messages.chatId, chatId), eq(messages.role, "assistant")),
orderBy: [desc(messages.createdAt)], orderBy: [desc(messages.createdAt)],
columns: { columns: {
id: true, // Fetch the ID id: true, // Fetch the ID
content: true, // Fetch the content to parse content: true, // Fetch the content to parse
approvalState: true, approvalState: true,
}, },
}); });
if ( if (
latestAssistantMessage?.approvalState === "rejected" || latestAssistantMessage?.content &&
latestAssistantMessage?.approvalState === "approved" latestAssistantMessage.id &&
) { !latestAssistantMessage?.approvalState
) {
const messageId = latestAssistantMessage.id; // Get the message ID
logger.log(
`Found latest assistant message (ID: ${messageId}), parsing content...`
);
const messageContent = latestAssistantMessage.content;
const proposalTitle = getDyadChatSummaryTag(messageContent);
const proposalWriteFiles = getDyadWriteTags(messageContent);
const proposalRenameFiles = getDyadRenameTags(messageContent);
const proposalDeleteFiles = getDyadDeleteTags(messageContent);
const proposalExecuteSqlQueries = getDyadExecuteSqlTags(messageContent);
const packagesAdded = getDyadAddDependencyTags(messageContent);
const filesChanged = [
...proposalWriteFiles.map((tag) => ({
name: path.basename(tag.path),
path: tag.path,
summary: tag.description ?? "(no change summary found)", // Generic summary
type: "write" as const,
isServerFunction: isServerFunction(tag.path),
})),
...proposalRenameFiles.map((tag) => ({
name: path.basename(tag.to),
path: tag.to,
summary: `Rename from ${tag.from} to ${tag.to}`,
type: "rename" as const,
isServerFunction: isServerFunction(tag.to),
})),
...proposalDeleteFiles.map((tag) => ({
name: path.basename(tag),
path: tag,
summary: `Delete file`,
type: "delete" as const,
isServerFunction: isServerFunction(tag),
})),
];
// Check if we have enough information to create a proposal
if (
filesChanged.length > 0 ||
packagesAdded.length > 0 ||
proposalExecuteSqlQueries.length > 0
) {
const proposal: CodeProposal = {
type: "code-proposal",
// Use parsed title or a default title if summary tag is missing but write tags exist
title: proposalTitle ?? "Proposed File Changes",
securityRisks: [], // Keep empty
filesChanged,
packagesAdded,
sqlQueries: proposalExecuteSqlQueries.map((query) => ({
content: query.content,
description: query.description,
})),
};
logger.log(
"Generated code proposal. title=",
proposal.title,
"files=",
proposal.filesChanged.length,
"packages=",
proposal.packagesAdded.length
);
return {
proposal: proposal,
chatId,
messageId,
};
} else {
logger.log(
"No relevant tags found in the latest assistant message content."
);
}
}
// Get all chat messages to calculate token usage // Get all chat messages to calculate token usage
const chat = await db.query.chats.findFirst({ const chat = await db.query.chats.findFirst({
where: eq(chats.id, chatId), where: eq(chats.id, chatId),
with: { with: {
app: true,
messages: { messages: {
orderBy: (messages, { asc }) => [asc(messages.createdAt)], orderBy: (messages, { asc }) => [asc(messages.createdAt)],
}, },
}, },
}); });
if (chat) { if (latestAssistantMessage && chat) {
// Calculate total tokens from message history // Calculate total tokens from message history
const totalTokens = estimateMessagesTokens(chat.messages); const messagesTokenCount = estimateMessagesTokens(chat.messages);
// Use cached token count or calculate new one
const codebaseTokenCount = await getCodebaseTokenCount(
chatId,
latestAssistantMessage.id,
latestAssistantMessage.content || "",
chat.app.path
);
const totalTokens = messagesTokenCount + codebaseTokenCount;
const contextWindow = Math.min(getContextWindow(), 100_000); const contextWindow = Math.min(getContextWindow(), 100_000);
logger.log( logger.log(
`Token usage: ${totalTokens}/${contextWindow} (${ `Token usage: ${totalTokens}/${contextWindow} (${
@@ -102,92 +271,11 @@ const getProposalHandler = async (
} }
} }
return null; return null;
} catch (error) {
logger.error(`Error processing proposal for chatId ${chatId}:`, error);
return null; // Indicate DB or processing error
} }
});
if (latestAssistantMessage?.content && latestAssistantMessage.id) {
const messageId = latestAssistantMessage.id; // Get the message ID
logger.log(
`Found latest assistant message (ID: ${messageId}), parsing content...`
);
const messageContent = latestAssistantMessage.content;
const proposalTitle = getDyadChatSummaryTag(messageContent);
const proposalWriteFiles = getDyadWriteTags(messageContent);
const proposalRenameFiles = getDyadRenameTags(messageContent);
const proposalDeleteFiles = getDyadDeleteTags(messageContent);
const proposalExecuteSqlQueries = getDyadExecuteSqlTags(messageContent);
const packagesAdded = getDyadAddDependencyTags(messageContent);
const filesChanged = [
...proposalWriteFiles.map((tag) => ({
name: path.basename(tag.path),
path: tag.path,
summary: tag.description ?? "(no change summary found)", // Generic summary
type: "write" as const,
isServerFunction: isServerFunction(tag.path),
})),
...proposalRenameFiles.map((tag) => ({
name: path.basename(tag.to),
path: tag.to,
summary: `Rename from ${tag.from} to ${tag.to}`,
type: "rename" as const,
isServerFunction: isServerFunction(tag.to),
})),
...proposalDeleteFiles.map((tag) => ({
name: path.basename(tag),
path: tag,
summary: `Delete file`,
type: "delete" as const,
isServerFunction: isServerFunction(tag),
})),
];
// Check if we have enough information to create a proposal
if (
filesChanged.length > 0 ||
packagesAdded.length > 0 ||
proposalExecuteSqlQueries.length > 0
) {
const proposal: CodeProposal = {
type: "code-proposal",
// Use parsed title or a default title if summary tag is missing but write tags exist
title: proposalTitle ?? "Proposed File Changes",
securityRisks: [], // Keep empty
filesChanged,
packagesAdded,
sqlQueries: proposalExecuteSqlQueries.map((query) => ({
content: query.content,
description: query.description,
})),
};
logger.log(
"Generated code proposal. title=",
proposal.title,
"files=",
proposal.filesChanged.length,
"packages=",
proposal.packagesAdded.length
);
return {
proposal: proposal,
chatId,
messageId,
};
} else {
logger.log(
"No relevant tags found in the latest assistant message content."
);
return null; // No proposal could be generated
}
} else {
logger.log(`No assistant message found for chatId: ${chatId}`);
return null; // No message found
}
} catch (error) {
logger.error(`Error processing proposal for chatId ${chatId}:`, error);
return null; // Indicate DB or processing error
}
}; };
// Handler to approve a proposal (process actions and update message) // Handler to approve a proposal (process actions and update message)

View File

@@ -1,6 +1,10 @@
import fs from "node:fs"; import fs from "node:fs";
import fsAsync from "node:fs/promises";
import path from "node:path"; import path from "node:path";
import { isIgnored } from "isomorphic-git"; import { isIgnored } from "isomorphic-git";
import log from "electron-log";
const logger = log.scope("utils/codebase");
// File extensions to include in the extraction // File extensions to include in the extraction
const ALLOWED_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".css", ".html"]; const ALLOWED_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".css", ".html"];
@@ -14,6 +18,23 @@ const ALWAYS_INCLUDE_FILES = ["package.json"];
// Maximum file size to include (in bytes) - 100KB // Maximum file size to include (in bytes) - 100KB
const MAX_FILE_SIZE = 100 * 1024; const MAX_FILE_SIZE = 100 * 1024;
// Maximum size for fileContentCache
const MAX_FILE_CACHE_SIZE = 500;
// File content cache with timestamps
type FileCache = {
content: string;
mtime: number;
};
// Cache for file contents
const fileContentCache = new Map<string, FileCache>();
// Cache for git ignored paths
const gitIgnoreCache = new Map<string, boolean>();
// Map to store .gitignore file paths and their modification times
const gitIgnoreMtimes = new Map<string, number>();
/** /**
* Check if a path should be ignored based on git ignore rules * Check if a path should be ignored based on git ignore rules
*/ */
@@ -22,14 +43,108 @@ async function isGitIgnored(
baseDir: string baseDir: string
): Promise<boolean> { ): Promise<boolean> {
try { try {
// Check if any relevant .gitignore has been modified
// Git checks .gitignore files in the path from the repo root to the file
let currentDir = baseDir;
const pathParts = path.relative(baseDir, filePath).split(path.sep);
let shouldClearCache = false;
// Check root .gitignore
const rootGitIgnorePath = path.join(baseDir, ".gitignore");
try {
const stats = await fsAsync.stat(rootGitIgnorePath);
const lastMtime = gitIgnoreMtimes.get(rootGitIgnorePath) || 0;
if (stats.mtimeMs > lastMtime) {
gitIgnoreMtimes.set(rootGitIgnorePath, stats.mtimeMs);
shouldClearCache = true;
}
} catch (error) {
// Root .gitignore might not exist, which is fine
}
// Check .gitignore files in parent directories
for (let i = 0; i < pathParts.length - 1; i++) {
currentDir = path.join(currentDir, pathParts[i]);
const gitIgnorePath = path.join(currentDir, ".gitignore");
try {
const stats = await fsAsync.stat(gitIgnorePath);
const lastMtime = gitIgnoreMtimes.get(gitIgnorePath) || 0;
if (stats.mtimeMs > lastMtime) {
gitIgnoreMtimes.set(gitIgnorePath, stats.mtimeMs);
shouldClearCache = true;
}
} catch (error) {
// This directory might not have a .gitignore, which is fine
}
}
// Clear cache if any .gitignore was modified
if (shouldClearCache) {
gitIgnoreCache.clear();
}
const cacheKey = `${baseDir}:${filePath}`;
if (gitIgnoreCache.has(cacheKey)) {
return gitIgnoreCache.get(cacheKey)!;
}
const relativePath = path.relative(baseDir, filePath); const relativePath = path.relative(baseDir, filePath);
return await isIgnored({ fs, dir: baseDir, filepath: relativePath }); const result = await isIgnored({
fs,
dir: baseDir,
filepath: relativePath,
});
gitIgnoreCache.set(cacheKey, result);
return result;
} catch (error) { } catch (error) {
console.error(`Error checking if path is git ignored: ${filePath}`, error); logger.error(`Error checking if path is git ignored: ${filePath}`, error);
return false; return false;
} }
} }
/**
* Read file contents with caching based on last modified time
*/
async function readFileWithCache(filePath: string): Promise<string | null> {
try {
// Get file stats to check the modification time
const stats = await fsAsync.stat(filePath);
const currentMtime = stats.mtimeMs;
// If file is in cache and hasn't been modified, use cached content
if (fileContentCache.has(filePath)) {
const cache = fileContentCache.get(filePath)!;
if (cache.mtime === currentMtime) {
return cache.content;
}
}
// Read file and update cache
const content = await fsAsync.readFile(filePath, "utf-8");
fileContentCache.set(filePath, { content, mtime: currentMtime });
// Manage cache size by clearing oldest entries when it gets too large
if (fileContentCache.size > MAX_FILE_CACHE_SIZE) {
// Get the oldest 25% of entries to remove
const entriesToDelete = Math.ceil(MAX_FILE_CACHE_SIZE * 0.25);
const keys = Array.from(fileContentCache.keys());
// Remove oldest entries (first in, first out)
for (let i = 0; i < entriesToDelete; i++) {
fileContentCache.delete(keys[i]);
}
}
return content;
} catch (error) {
logger.error(`Error reading file: ${filePath}`, error);
return null;
}
}
/** /**
* Recursively walk a directory and collect all relevant files * Recursively walk a directory and collect all relevant files
*/ */
@@ -37,25 +152,29 @@ async function collectFiles(dir: string, baseDir: string): Promise<string[]> {
const files: string[] = []; const files: string[] = [];
// Check if directory exists // Check if directory exists
if (!fs.existsSync(dir)) { try {
await fsAsync.access(dir);
} catch {
// Directory doesn't exist or is not accessible
return files; return files;
} }
try { try {
// Read directory contents // Read directory contents
const entries = fs.readdirSync(dir, { withFileTypes: true }); const entries = await fsAsync.readdir(dir, { withFileTypes: true });
for (const entry of entries) { // Process entries concurrently
const promises = entries.map(async (entry) => {
const fullPath = path.join(dir, entry.name); const fullPath = path.join(dir, entry.name);
// Skip excluded directories // Skip excluded directories
if (entry.isDirectory() && EXCLUDED_DIRS.includes(entry.name)) { if (entry.isDirectory() && EXCLUDED_DIRS.includes(entry.name)) {
continue; return;
} }
// Skip if the entry is git ignored // Skip if the entry is git ignored
if (await isGitIgnored(fullPath, baseDir)) { if (await isGitIgnored(fullPath, baseDir)) {
continue; return;
} }
if (entry.isDirectory()) { if (entry.isDirectory()) {
@@ -69,22 +188,24 @@ async function collectFiles(dir: string, baseDir: string): Promise<string[]> {
// Skip files that are too large // Skip files that are too large
try { try {
const stats = fs.statSync(fullPath); const stats = await fsAsync.stat(fullPath);
if (stats.size > MAX_FILE_SIZE) { if (stats.size > MAX_FILE_SIZE) {
continue; return;
} }
} catch (error) { } catch (error) {
console.error(`Error checking file size: ${fullPath}`, error); logger.error(`Error checking file size: ${fullPath}`, error);
continue; return;
} }
if (ALLOWED_EXTENSIONS.includes(ext) || shouldAlwaysInclude) { if (ALLOWED_EXTENSIONS.includes(ext) || shouldAlwaysInclude) {
files.push(fullPath); files.push(fullPath);
} }
} }
} });
await Promise.all(promises);
} catch (error) { } catch (error) {
console.error(`Error reading directory ${dir}:`, error); logger.error(`Error reading directory ${dir}:`, error);
} }
return files; return files;
@@ -93,7 +214,7 @@ async function collectFiles(dir: string, baseDir: string): Promise<string[]> {
/** /**
* Format a file for inclusion in the codebase extract * Format a file for inclusion in the codebase extract
*/ */
function formatFile(filePath: string, baseDir: string): string { async function formatFile(filePath: string, baseDir: string): Promise<string> {
try { try {
const relativePath = path.relative(baseDir, filePath); const relativePath = path.relative(baseDir, filePath);
@@ -114,7 +235,15 @@ function formatFile(filePath: string, baseDir: string): string {
`; `;
} }
const content = fs.readFileSync(filePath, "utf-8"); const content = await readFileWithCache(filePath);
if (content === null) {
return `<dyad-file path="${relativePath}">
// Error reading file
</dyad-file>
`;
}
return `<dyad-file path="${relativePath}"> return `<dyad-file path="${relativePath}">
${content} ${content}
@@ -122,7 +251,7 @@ ${content}
`; `;
} catch (error) { } catch (error) {
console.error(`Error reading file: ${filePath}`, error); logger.error(`Error reading file: ${filePath}`, error);
return `<dyad-file path="${path.relative(baseDir, filePath)}"> return `<dyad-file path="${path.relative(baseDir, filePath)}">
// Error reading file: ${error} // Error reading file: ${error}
</dyad-file> </dyad-file>
@@ -137,26 +266,53 @@ ${content}
* @returns A string containing formatted file contents * @returns A string containing formatted file contents
*/ */
export async function extractCodebase(appPath: string): Promise<string> { export async function extractCodebase(appPath: string): Promise<string> {
if (!fs.existsSync(appPath)) { try {
return `# Error: Directory ${appPath} does not exist`; await fsAsync.access(appPath);
} catch {
return `# Error: Directory ${appPath} does not exist or is not accessible`;
} }
const startTime = Date.now();
// Collect all relevant files // Collect all relevant files
const files = await collectFiles(appPath, appPath); const files = await collectFiles(appPath, appPath);
// Sort files to prioritize important files // Sort files by modification time (oldest first)
const sortedFiles = sortFilesByImportance(files, appPath); // This is important for cache-ability.
const sortedFiles = await sortFilesByModificationTime(files);
// Format files // Format files
let output = ""; let output = "";
const formatPromises = sortedFiles.map((file) => formatFile(file, appPath));
const formattedFiles = await Promise.all(formatPromises);
output = formattedFiles.join("");
for (const file of sortedFiles) { const endTime = Date.now();
output += formatFile(file, appPath); logger.log("extractCodebase: time taken", endTime - startTime);
}
return output; return output;
} }
/**
* Sort files by their modification timestamp (oldest first)
*/
async function sortFilesByModificationTime(files: string[]): Promise<string[]> {
// Get stats for all files
const fileStats = await Promise.all(
files.map(async (file) => {
try {
const stats = await fsAsync.stat(file);
return { file, mtime: stats.mtimeMs };
} catch (error) {
// If there's an error getting stats, use current time as fallback
logger.error(`Error getting file stats for ${file}:`, error);
return { file, mtime: Date.now() };
}
})
);
// Sort by modification time (oldest first)
return fileStats.sort((a, b) => a.mtime - b.mtime).map((item) => item.file);
}
/** /**
* Sort files by their importance for context * Sort files by their importance for context
*/ */