From 7d5595f63011e7c05ef59a32972b40a7c492e8ad Mon Sep 17 00:00:00 2001 From: Will Chen Date: Mon, 28 Apr 2025 21:39:16 -0700 Subject: [PATCH] Optimize codebase extract & fix proposal handler token counting logic (#36) * remove deprecated script * Optimize codebase extract & fix proposal handler token counting logic * add caching + lock * sort by modified timestamp * cache cleanup --- scripts/extract-codebase.ts | 116 ---------- src/ipc/handlers/proposal_handlers.ts | 298 +++++++++++++++++--------- src/utils/codebase.ts | 204 +++++++++++++++--- 3 files changed, 373 insertions(+), 245 deletions(-) delete mode 100644 scripts/extract-codebase.ts diff --git a/scripts/extract-codebase.ts b/scripts/extract-codebase.ts deleted file mode 100644 index b641ed0..0000000 --- a/scripts/extract-codebase.ts +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env node - -// Add type module declaration at the top -// @ts-check -// @ts-ignore -// eslint-disable-next-line -// @ts-nocheck - -import fs from "fs"; -import path from "path"; -import { fileURLToPath } from "url"; -import { dirname } from "path"; -import { isIgnored } from "isomorphic-git"; -import log from "electron-log"; - -const logger = log.scope("extract-codebase"); - -// File extensions to include -const ALLOWED_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".css"]; - -// Function to check if a path is ignored by gitignore -async function isGitIgnored( - filePath: string, - baseDir: string -): Promise { - try { - const relativePath = path.relative(baseDir, filePath); - return await isIgnored({ fs, dir: baseDir, filepath: relativePath }); - } catch (error) { - logger.error(`Error checking if path is git ignored: ${filePath}`, error); - return false; - } -} - -// Function to recursively walk a directory -async function walkDirectory(dir: string, baseDir: string): Promise { - const files: string[] = []; - - // Read directory contents - const entries = fs.readdirSync(dir, { withFileTypes: true }); - - for (const entry of entries) { - const fullPath = path.join(dir, entry.name); - - // Skip if the entry is git ignored - if (await isGitIgnored(fullPath, baseDir)) { - continue; - } - - if (entry.isDirectory()) { - // Recursively process subdirectories - const subDirFiles = await walkDirectory(fullPath, baseDir); - files.push(...subDirFiles); - } else if (entry.isFile()) { - // Check file extension - const ext = path.extname(entry.name).toLowerCase(); - if (ALLOWED_EXTENSIONS.includes(ext)) { - files.push(fullPath); - } - } - } - - return files; -} - -// Function to read file contents and format for LLM consumption -function formatFile(filePath: string, baseDir: string): string { - try { - const relativePath = path.relative(baseDir, filePath); - - // Check if the file is in components/ui directory - if ( - relativePath.includes("eslint.config") || - relativePath.includes("components/ui") || - relativePath.includes("components\\ui") - ) { - return `## File: ${relativePath}\n\n`; - } - - const content = fs.readFileSync(filePath, "utf-8"); - - return `## File: ${relativePath}\n\`\`\`${path - .extname(filePath) - .substring(1)}\n${content}\n\`\`\`\n\n`; - } catch (error) { - logger.error(`Error reading file: ${filePath}`, error); - return `## File: ${filePath}\nError reading file: ${error}\n\n`; - } -} - -async function main() { - // Get command line arguments - const args = process.argv.slice(2); - const scaffoldDir = args[0] || process.cwd(); - const outputFile = args[1] || "codebase-extract.md"; - - console.log(`Extracting code from: ${scaffoldDir}`); - console.log(`Output will be written to: ${outputFile}`); - - // Walk directory and get all files - const files = await walkDirectory(scaffoldDir, scaffoldDir); - console.log(`Found ${files.length} code files`); - - // Format files - let output = `# Codebase Extract\nGenerated on: ${new Date().toISOString()}\nTotal files: ${ - files.length - }\n\n`; - - for (const file of files) { - output += formatFile(file, scaffoldDir); - } - - // Write to output file - fs.writeFileSync(outputFile, output); - console.log(`Extraction complete. Output written to ${outputFile}`); -} diff --git a/src/ipc/handlers/proposal_handlers.ts b/src/ipc/handlers/proposal_handlers.ts index f5e5432..c1752dc 100644 --- a/src/ipc/handlers/proposal_handlers.ts +++ b/src/ipc/handlers/proposal_handlers.ts @@ -22,8 +22,14 @@ import { } from "../processors/response_processor"; import log from "electron-log"; import { isServerFunction } from "../../supabase_admin/supabase_utils"; -import { estimateMessagesTokens, getContextWindow } from "../utils/token_utils"; - +import { + estimateMessagesTokens, + estimateTokens, + getContextWindow, +} from "../utils/token_utils"; +import { extractCodebase } from "../../utils/codebase"; +import { getDyadAppPath } from "../../paths/paths"; +import { withLock } from "../utils/lock_utils"; const logger = log.scope("proposal_handlers"); // Placeholder Proposal data (can be removed or kept for reference) @@ -44,41 +50,204 @@ function isParsedProposal(obj: any): obj is ParsedProposal { ); } +// Cache for codebase token counts +interface CodebaseTokenCache { + chatId: number; + messageId: number; + messageContent: string; + tokenCount: number; + timestamp: number; +} + +// Cache expiration time (5 minutes) +const CACHE_EXPIRATION_MS = 5 * 60 * 1000; + +// In-memory cache for codebase token counts +const codebaseTokenCache = new Map(); + +// Function to clean up expired cache entries +function cleanupExpiredCacheEntries() { + const now = Date.now(); + let expiredCount = 0; + + codebaseTokenCache.forEach((entry, key) => { + if (now - entry.timestamp > CACHE_EXPIRATION_MS) { + codebaseTokenCache.delete(key); + expiredCount++; + } + }); + + if (expiredCount > 0) { + logger.log( + `Cleaned up ${expiredCount} expired codebase token cache entries` + ); + } +} + +// Function to get cached token count or calculate and cache it +async function getCodebaseTokenCount( + chatId: number, + messageId: number, + messageContent: string, + appPath: string +): Promise { + // Clean up expired cache entries first + cleanupExpiredCacheEntries(); + + const cacheEntry = codebaseTokenCache.get(chatId); + const now = Date.now(); + + // Check if cache is valid - same chat, message and content, and not expired + if ( + cacheEntry && + cacheEntry.messageId === messageId && + cacheEntry.messageContent === messageContent && + now - cacheEntry.timestamp < CACHE_EXPIRATION_MS + ) { + logger.log(`Using cached codebase token count for chatId: ${chatId}`); + return cacheEntry.tokenCount; + } + + // Calculate and cache the token count + logger.log(`Calculating codebase token count for chatId: ${chatId}`); + const codebase = await extractCodebase(getDyadAppPath(appPath)); + const tokenCount = estimateTokens(codebase); + + // Store in cache + codebaseTokenCache.set(chatId, { + chatId, + messageId, + messageContent, + tokenCount, + timestamp: now, + }); + + return tokenCount; +} + const getProposalHandler = async ( _event: IpcMainInvokeEvent, { chatId }: { chatId: number } ): Promise => { - logger.log(`IPC: get-proposal called for chatId: ${chatId}`); + return withLock("get-proposal:" + chatId, async () => { + logger.log(`IPC: get-proposal called for chatId: ${chatId}`); - try { - // Find the latest ASSISTANT message for the chat - const latestAssistantMessage = await db.query.messages.findFirst({ - where: and(eq(messages.chatId, chatId), eq(messages.role, "assistant")), - orderBy: [desc(messages.createdAt)], - columns: { - id: true, // Fetch the ID - content: true, // Fetch the content to parse - approvalState: true, - }, - }); + try { + // Find the latest ASSISTANT message for the chat + const latestAssistantMessage = await db.query.messages.findFirst({ + where: and(eq(messages.chatId, chatId), eq(messages.role, "assistant")), + orderBy: [desc(messages.createdAt)], + columns: { + id: true, // Fetch the ID + content: true, // Fetch the content to parse + approvalState: true, + }, + }); - if ( - latestAssistantMessage?.approvalState === "rejected" || - latestAssistantMessage?.approvalState === "approved" - ) { + if ( + latestAssistantMessage?.content && + latestAssistantMessage.id && + !latestAssistantMessage?.approvalState + ) { + const messageId = latestAssistantMessage.id; // Get the message ID + logger.log( + `Found latest assistant message (ID: ${messageId}), parsing content...` + ); + const messageContent = latestAssistantMessage.content; + + const proposalTitle = getDyadChatSummaryTag(messageContent); + + const proposalWriteFiles = getDyadWriteTags(messageContent); + const proposalRenameFiles = getDyadRenameTags(messageContent); + const proposalDeleteFiles = getDyadDeleteTags(messageContent); + const proposalExecuteSqlQueries = getDyadExecuteSqlTags(messageContent); + const packagesAdded = getDyadAddDependencyTags(messageContent); + + const filesChanged = [ + ...proposalWriteFiles.map((tag) => ({ + name: path.basename(tag.path), + path: tag.path, + summary: tag.description ?? "(no change summary found)", // Generic summary + type: "write" as const, + isServerFunction: isServerFunction(tag.path), + })), + ...proposalRenameFiles.map((tag) => ({ + name: path.basename(tag.to), + path: tag.to, + summary: `Rename from ${tag.from} to ${tag.to}`, + type: "rename" as const, + isServerFunction: isServerFunction(tag.to), + })), + ...proposalDeleteFiles.map((tag) => ({ + name: path.basename(tag), + path: tag, + summary: `Delete file`, + type: "delete" as const, + isServerFunction: isServerFunction(tag), + })), + ]; + // Check if we have enough information to create a proposal + if ( + filesChanged.length > 0 || + packagesAdded.length > 0 || + proposalExecuteSqlQueries.length > 0 + ) { + const proposal: CodeProposal = { + type: "code-proposal", + // Use parsed title or a default title if summary tag is missing but write tags exist + title: proposalTitle ?? "Proposed File Changes", + securityRisks: [], // Keep empty + filesChanged, + packagesAdded, + sqlQueries: proposalExecuteSqlQueries.map((query) => ({ + content: query.content, + description: query.description, + })), + }; + logger.log( + "Generated code proposal. title=", + proposal.title, + "files=", + proposal.filesChanged.length, + "packages=", + proposal.packagesAdded.length + ); + + return { + proposal: proposal, + chatId, + messageId, + }; + } else { + logger.log( + "No relevant tags found in the latest assistant message content." + ); + } + } // Get all chat messages to calculate token usage const chat = await db.query.chats.findFirst({ where: eq(chats.id, chatId), with: { + app: true, messages: { orderBy: (messages, { asc }) => [asc(messages.createdAt)], }, }, }); - if (chat) { + if (latestAssistantMessage && chat) { // Calculate total tokens from message history - const totalTokens = estimateMessagesTokens(chat.messages); + const messagesTokenCount = estimateMessagesTokens(chat.messages); + + // Use cached token count or calculate new one + const codebaseTokenCount = await getCodebaseTokenCount( + chatId, + latestAssistantMessage.id, + latestAssistantMessage.content || "", + chat.app.path + ); + + const totalTokens = messagesTokenCount + codebaseTokenCount; const contextWindow = Math.min(getContextWindow(), 100_000); logger.log( `Token usage: ${totalTokens}/${contextWindow} (${ @@ -102,92 +271,11 @@ const getProposalHandler = async ( } } return null; + } catch (error) { + logger.error(`Error processing proposal for chatId ${chatId}:`, error); + return null; // Indicate DB or processing error } - - if (latestAssistantMessage?.content && latestAssistantMessage.id) { - const messageId = latestAssistantMessage.id; // Get the message ID - logger.log( - `Found latest assistant message (ID: ${messageId}), parsing content...` - ); - const messageContent = latestAssistantMessage.content; - - const proposalTitle = getDyadChatSummaryTag(messageContent); - - const proposalWriteFiles = getDyadWriteTags(messageContent); - const proposalRenameFiles = getDyadRenameTags(messageContent); - const proposalDeleteFiles = getDyadDeleteTags(messageContent); - const proposalExecuteSqlQueries = getDyadExecuteSqlTags(messageContent); - const packagesAdded = getDyadAddDependencyTags(messageContent); - - const filesChanged = [ - ...proposalWriteFiles.map((tag) => ({ - name: path.basename(tag.path), - path: tag.path, - summary: tag.description ?? "(no change summary found)", // Generic summary - type: "write" as const, - isServerFunction: isServerFunction(tag.path), - })), - ...proposalRenameFiles.map((tag) => ({ - name: path.basename(tag.to), - path: tag.to, - summary: `Rename from ${tag.from} to ${tag.to}`, - type: "rename" as const, - isServerFunction: isServerFunction(tag.to), - })), - ...proposalDeleteFiles.map((tag) => ({ - name: path.basename(tag), - path: tag, - summary: `Delete file`, - type: "delete" as const, - isServerFunction: isServerFunction(tag), - })), - ]; - // Check if we have enough information to create a proposal - if ( - filesChanged.length > 0 || - packagesAdded.length > 0 || - proposalExecuteSqlQueries.length > 0 - ) { - const proposal: CodeProposal = { - type: "code-proposal", - // Use parsed title or a default title if summary tag is missing but write tags exist - title: proposalTitle ?? "Proposed File Changes", - securityRisks: [], // Keep empty - filesChanged, - packagesAdded, - sqlQueries: proposalExecuteSqlQueries.map((query) => ({ - content: query.content, - description: query.description, - })), - }; - logger.log( - "Generated code proposal. title=", - proposal.title, - "files=", - proposal.filesChanged.length, - "packages=", - proposal.packagesAdded.length - ); - - return { - proposal: proposal, - chatId, - messageId, - }; - } else { - logger.log( - "No relevant tags found in the latest assistant message content." - ); - return null; // No proposal could be generated - } - } else { - logger.log(`No assistant message found for chatId: ${chatId}`); - return null; // No message found - } - } catch (error) { - logger.error(`Error processing proposal for chatId ${chatId}:`, error); - return null; // Indicate DB or processing error - } + }); }; // Handler to approve a proposal (process actions and update message) diff --git a/src/utils/codebase.ts b/src/utils/codebase.ts index fcf6441..29ea48c 100644 --- a/src/utils/codebase.ts +++ b/src/utils/codebase.ts @@ -1,6 +1,10 @@ import fs from "node:fs"; +import fsAsync from "node:fs/promises"; import path from "node:path"; import { isIgnored } from "isomorphic-git"; +import log from "electron-log"; + +const logger = log.scope("utils/codebase"); // File extensions to include in the extraction const ALLOWED_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".css", ".html"]; @@ -14,6 +18,23 @@ const ALWAYS_INCLUDE_FILES = ["package.json"]; // Maximum file size to include (in bytes) - 100KB const MAX_FILE_SIZE = 100 * 1024; +// Maximum size for fileContentCache +const MAX_FILE_CACHE_SIZE = 500; + +// File content cache with timestamps +type FileCache = { + content: string; + mtime: number; +}; + +// Cache for file contents +const fileContentCache = new Map(); + +// Cache for git ignored paths +const gitIgnoreCache = new Map(); +// Map to store .gitignore file paths and their modification times +const gitIgnoreMtimes = new Map(); + /** * Check if a path should be ignored based on git ignore rules */ @@ -22,14 +43,108 @@ async function isGitIgnored( baseDir: string ): Promise { try { + // Check if any relevant .gitignore has been modified + // Git checks .gitignore files in the path from the repo root to the file + let currentDir = baseDir; + const pathParts = path.relative(baseDir, filePath).split(path.sep); + let shouldClearCache = false; + + // Check root .gitignore + const rootGitIgnorePath = path.join(baseDir, ".gitignore"); + try { + const stats = await fsAsync.stat(rootGitIgnorePath); + const lastMtime = gitIgnoreMtimes.get(rootGitIgnorePath) || 0; + if (stats.mtimeMs > lastMtime) { + gitIgnoreMtimes.set(rootGitIgnorePath, stats.mtimeMs); + shouldClearCache = true; + } + } catch (error) { + // Root .gitignore might not exist, which is fine + } + + // Check .gitignore files in parent directories + for (let i = 0; i < pathParts.length - 1; i++) { + currentDir = path.join(currentDir, pathParts[i]); + const gitIgnorePath = path.join(currentDir, ".gitignore"); + + try { + const stats = await fsAsync.stat(gitIgnorePath); + const lastMtime = gitIgnoreMtimes.get(gitIgnorePath) || 0; + if (stats.mtimeMs > lastMtime) { + gitIgnoreMtimes.set(gitIgnorePath, stats.mtimeMs); + shouldClearCache = true; + } + } catch (error) { + // This directory might not have a .gitignore, which is fine + } + } + + // Clear cache if any .gitignore was modified + if (shouldClearCache) { + gitIgnoreCache.clear(); + } + + const cacheKey = `${baseDir}:${filePath}`; + + if (gitIgnoreCache.has(cacheKey)) { + return gitIgnoreCache.get(cacheKey)!; + } + const relativePath = path.relative(baseDir, filePath); - return await isIgnored({ fs, dir: baseDir, filepath: relativePath }); + const result = await isIgnored({ + fs, + dir: baseDir, + filepath: relativePath, + }); + + gitIgnoreCache.set(cacheKey, result); + return result; } catch (error) { - console.error(`Error checking if path is git ignored: ${filePath}`, error); + logger.error(`Error checking if path is git ignored: ${filePath}`, error); return false; } } +/** + * Read file contents with caching based on last modified time + */ +async function readFileWithCache(filePath: string): Promise { + try { + // Get file stats to check the modification time + const stats = await fsAsync.stat(filePath); + const currentMtime = stats.mtimeMs; + + // If file is in cache and hasn't been modified, use cached content + if (fileContentCache.has(filePath)) { + const cache = fileContentCache.get(filePath)!; + if (cache.mtime === currentMtime) { + return cache.content; + } + } + + // Read file and update cache + const content = await fsAsync.readFile(filePath, "utf-8"); + fileContentCache.set(filePath, { content, mtime: currentMtime }); + + // Manage cache size by clearing oldest entries when it gets too large + if (fileContentCache.size > MAX_FILE_CACHE_SIZE) { + // Get the oldest 25% of entries to remove + const entriesToDelete = Math.ceil(MAX_FILE_CACHE_SIZE * 0.25); + const keys = Array.from(fileContentCache.keys()); + + // Remove oldest entries (first in, first out) + for (let i = 0; i < entriesToDelete; i++) { + fileContentCache.delete(keys[i]); + } + } + + return content; + } catch (error) { + logger.error(`Error reading file: ${filePath}`, error); + return null; + } +} + /** * Recursively walk a directory and collect all relevant files */ @@ -37,25 +152,29 @@ async function collectFiles(dir: string, baseDir: string): Promise { const files: string[] = []; // Check if directory exists - if (!fs.existsSync(dir)) { + try { + await fsAsync.access(dir); + } catch { + // Directory doesn't exist or is not accessible return files; } try { // Read directory contents - const entries = fs.readdirSync(dir, { withFileTypes: true }); + const entries = await fsAsync.readdir(dir, { withFileTypes: true }); - for (const entry of entries) { + // Process entries concurrently + const promises = entries.map(async (entry) => { const fullPath = path.join(dir, entry.name); // Skip excluded directories if (entry.isDirectory() && EXCLUDED_DIRS.includes(entry.name)) { - continue; + return; } // Skip if the entry is git ignored if (await isGitIgnored(fullPath, baseDir)) { - continue; + return; } if (entry.isDirectory()) { @@ -69,22 +188,24 @@ async function collectFiles(dir: string, baseDir: string): Promise { // Skip files that are too large try { - const stats = fs.statSync(fullPath); + const stats = await fsAsync.stat(fullPath); if (stats.size > MAX_FILE_SIZE) { - continue; + return; } } catch (error) { - console.error(`Error checking file size: ${fullPath}`, error); - continue; + logger.error(`Error checking file size: ${fullPath}`, error); + return; } if (ALLOWED_EXTENSIONS.includes(ext) || shouldAlwaysInclude) { files.push(fullPath); } } - } + }); + + await Promise.all(promises); } catch (error) { - console.error(`Error reading directory ${dir}:`, error); + logger.error(`Error reading directory ${dir}:`, error); } return files; @@ -93,7 +214,7 @@ async function collectFiles(dir: string, baseDir: string): Promise { /** * Format a file for inclusion in the codebase extract */ -function formatFile(filePath: string, baseDir: string): string { +async function formatFile(filePath: string, baseDir: string): Promise { try { const relativePath = path.relative(baseDir, filePath); @@ -114,7 +235,15 @@ function formatFile(filePath: string, baseDir: string): string { `; } - const content = fs.readFileSync(filePath, "utf-8"); + const content = await readFileWithCache(filePath); + + if (content === null) { + return ` +// Error reading file + + +`; + } return ` ${content} @@ -122,7 +251,7 @@ ${content} `; } catch (error) { - console.error(`Error reading file: ${filePath}`, error); + logger.error(`Error reading file: ${filePath}`, error); return ` // Error reading file: ${error} @@ -137,26 +266,53 @@ ${content} * @returns A string containing formatted file contents */ export async function extractCodebase(appPath: string): Promise { - if (!fs.existsSync(appPath)) { - return `# Error: Directory ${appPath} does not exist`; + try { + await fsAsync.access(appPath); + } catch { + return `# Error: Directory ${appPath} does not exist or is not accessible`; } + const startTime = Date.now(); // Collect all relevant files const files = await collectFiles(appPath, appPath); - // Sort files to prioritize important files - const sortedFiles = sortFilesByImportance(files, appPath); + // Sort files by modification time (oldest first) + // This is important for cache-ability. + const sortedFiles = await sortFilesByModificationTime(files); // Format files let output = ""; + const formatPromises = sortedFiles.map((file) => formatFile(file, appPath)); + const formattedFiles = await Promise.all(formatPromises); + output = formattedFiles.join(""); - for (const file of sortedFiles) { - output += formatFile(file, appPath); - } - + const endTime = Date.now(); + logger.log("extractCodebase: time taken", endTime - startTime); return output; } +/** + * Sort files by their modification timestamp (oldest first) + */ +async function sortFilesByModificationTime(files: string[]): Promise { + // Get stats for all files + const fileStats = await Promise.all( + files.map(async (file) => { + try { + const stats = await fsAsync.stat(file); + return { file, mtime: stats.mtimeMs }; + } catch (error) { + // If there's an error getting stats, use current time as fallback + logger.error(`Error getting file stats for ${file}:`, error); + return { file, mtime: Date.now() }; + } + }) + ); + + // Sort by modification time (oldest first) + return fileStats.sort((a, b) => a.mtime - b.mtime).map((item) => item.file); +} + /** * Sort files by their importance for context */