Optimize codebase extract & fix proposal handler token counting logic (#36)
* remove deprecated script * Optimize codebase extract & fix proposal handler token counting logic * add caching + lock * sort by modified timestamp * cache cleanup
This commit is contained in:
@@ -1,116 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
|
|
||||||
// Add type module declaration at the top
|
|
||||||
// @ts-check
|
|
||||||
// @ts-ignore
|
|
||||||
// eslint-disable-next-line
|
|
||||||
// @ts-nocheck
|
|
||||||
|
|
||||||
import fs from "fs";
|
|
||||||
import path from "path";
|
|
||||||
import { fileURLToPath } from "url";
|
|
||||||
import { dirname } from "path";
|
|
||||||
import { isIgnored } from "isomorphic-git";
|
|
||||||
import log from "electron-log";
|
|
||||||
|
|
||||||
const logger = log.scope("extract-codebase");
|
|
||||||
|
|
||||||
// File extensions to include
|
|
||||||
const ALLOWED_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".css"];
|
|
||||||
|
|
||||||
// Function to check if a path is ignored by gitignore
|
|
||||||
async function isGitIgnored(
|
|
||||||
filePath: string,
|
|
||||||
baseDir: string
|
|
||||||
): Promise<boolean> {
|
|
||||||
try {
|
|
||||||
const relativePath = path.relative(baseDir, filePath);
|
|
||||||
return await isIgnored({ fs, dir: baseDir, filepath: relativePath });
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Error checking if path is git ignored: ${filePath}`, error);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Function to recursively walk a directory
|
|
||||||
async function walkDirectory(dir: string, baseDir: string): Promise<string[]> {
|
|
||||||
const files: string[] = [];
|
|
||||||
|
|
||||||
// Read directory contents
|
|
||||||
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
||||||
|
|
||||||
for (const entry of entries) {
|
|
||||||
const fullPath = path.join(dir, entry.name);
|
|
||||||
|
|
||||||
// Skip if the entry is git ignored
|
|
||||||
if (await isGitIgnored(fullPath, baseDir)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (entry.isDirectory()) {
|
|
||||||
// Recursively process subdirectories
|
|
||||||
const subDirFiles = await walkDirectory(fullPath, baseDir);
|
|
||||||
files.push(...subDirFiles);
|
|
||||||
} else if (entry.isFile()) {
|
|
||||||
// Check file extension
|
|
||||||
const ext = path.extname(entry.name).toLowerCase();
|
|
||||||
if (ALLOWED_EXTENSIONS.includes(ext)) {
|
|
||||||
files.push(fullPath);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return files;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Function to read file contents and format for LLM consumption
|
|
||||||
function formatFile(filePath: string, baseDir: string): string {
|
|
||||||
try {
|
|
||||||
const relativePath = path.relative(baseDir, filePath);
|
|
||||||
|
|
||||||
// Check if the file is in components/ui directory
|
|
||||||
if (
|
|
||||||
relativePath.includes("eslint.config") ||
|
|
||||||
relativePath.includes("components/ui") ||
|
|
||||||
relativePath.includes("components\\ui")
|
|
||||||
) {
|
|
||||||
return `## File: ${relativePath}\n\n`;
|
|
||||||
}
|
|
||||||
|
|
||||||
const content = fs.readFileSync(filePath, "utf-8");
|
|
||||||
|
|
||||||
return `## File: ${relativePath}\n\`\`\`${path
|
|
||||||
.extname(filePath)
|
|
||||||
.substring(1)}\n${content}\n\`\`\`\n\n`;
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Error reading file: ${filePath}`, error);
|
|
||||||
return `## File: ${filePath}\nError reading file: ${error}\n\n`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
// Get command line arguments
|
|
||||||
const args = process.argv.slice(2);
|
|
||||||
const scaffoldDir = args[0] || process.cwd();
|
|
||||||
const outputFile = args[1] || "codebase-extract.md";
|
|
||||||
|
|
||||||
console.log(`Extracting code from: ${scaffoldDir}`);
|
|
||||||
console.log(`Output will be written to: ${outputFile}`);
|
|
||||||
|
|
||||||
// Walk directory and get all files
|
|
||||||
const files = await walkDirectory(scaffoldDir, scaffoldDir);
|
|
||||||
console.log(`Found ${files.length} code files`);
|
|
||||||
|
|
||||||
// Format files
|
|
||||||
let output = `# Codebase Extract\nGenerated on: ${new Date().toISOString()}\nTotal files: ${
|
|
||||||
files.length
|
|
||||||
}\n\n`;
|
|
||||||
|
|
||||||
for (const file of files) {
|
|
||||||
output += formatFile(file, scaffoldDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write to output file
|
|
||||||
fs.writeFileSync(outputFile, output);
|
|
||||||
console.log(`Extraction complete. Output written to ${outputFile}`);
|
|
||||||
}
|
|
||||||
@@ -22,8 +22,14 @@ import {
|
|||||||
} from "../processors/response_processor";
|
} from "../processors/response_processor";
|
||||||
import log from "electron-log";
|
import log from "electron-log";
|
||||||
import { isServerFunction } from "../../supabase_admin/supabase_utils";
|
import { isServerFunction } from "../../supabase_admin/supabase_utils";
|
||||||
import { estimateMessagesTokens, getContextWindow } from "../utils/token_utils";
|
import {
|
||||||
|
estimateMessagesTokens,
|
||||||
|
estimateTokens,
|
||||||
|
getContextWindow,
|
||||||
|
} from "../utils/token_utils";
|
||||||
|
import { extractCodebase } from "../../utils/codebase";
|
||||||
|
import { getDyadAppPath } from "../../paths/paths";
|
||||||
|
import { withLock } from "../utils/lock_utils";
|
||||||
const logger = log.scope("proposal_handlers");
|
const logger = log.scope("proposal_handlers");
|
||||||
|
|
||||||
// Placeholder Proposal data (can be removed or kept for reference)
|
// Placeholder Proposal data (can be removed or kept for reference)
|
||||||
@@ -44,41 +50,204 @@ function isParsedProposal(obj: any): obj is ParsedProposal {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cache for codebase token counts
|
||||||
|
interface CodebaseTokenCache {
|
||||||
|
chatId: number;
|
||||||
|
messageId: number;
|
||||||
|
messageContent: string;
|
||||||
|
tokenCount: number;
|
||||||
|
timestamp: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache expiration time (5 minutes)
|
||||||
|
const CACHE_EXPIRATION_MS = 5 * 60 * 1000;
|
||||||
|
|
||||||
|
// In-memory cache for codebase token counts
|
||||||
|
const codebaseTokenCache = new Map<number, CodebaseTokenCache>();
|
||||||
|
|
||||||
|
// Function to clean up expired cache entries
|
||||||
|
function cleanupExpiredCacheEntries() {
|
||||||
|
const now = Date.now();
|
||||||
|
let expiredCount = 0;
|
||||||
|
|
||||||
|
codebaseTokenCache.forEach((entry, key) => {
|
||||||
|
if (now - entry.timestamp > CACHE_EXPIRATION_MS) {
|
||||||
|
codebaseTokenCache.delete(key);
|
||||||
|
expiredCount++;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (expiredCount > 0) {
|
||||||
|
logger.log(
|
||||||
|
`Cleaned up ${expiredCount} expired codebase token cache entries`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function to get cached token count or calculate and cache it
|
||||||
|
async function getCodebaseTokenCount(
|
||||||
|
chatId: number,
|
||||||
|
messageId: number,
|
||||||
|
messageContent: string,
|
||||||
|
appPath: string
|
||||||
|
): Promise<number> {
|
||||||
|
// Clean up expired cache entries first
|
||||||
|
cleanupExpiredCacheEntries();
|
||||||
|
|
||||||
|
const cacheEntry = codebaseTokenCache.get(chatId);
|
||||||
|
const now = Date.now();
|
||||||
|
|
||||||
|
// Check if cache is valid - same chat, message and content, and not expired
|
||||||
|
if (
|
||||||
|
cacheEntry &&
|
||||||
|
cacheEntry.messageId === messageId &&
|
||||||
|
cacheEntry.messageContent === messageContent &&
|
||||||
|
now - cacheEntry.timestamp < CACHE_EXPIRATION_MS
|
||||||
|
) {
|
||||||
|
logger.log(`Using cached codebase token count for chatId: ${chatId}`);
|
||||||
|
return cacheEntry.tokenCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate and cache the token count
|
||||||
|
logger.log(`Calculating codebase token count for chatId: ${chatId}`);
|
||||||
|
const codebase = await extractCodebase(getDyadAppPath(appPath));
|
||||||
|
const tokenCount = estimateTokens(codebase);
|
||||||
|
|
||||||
|
// Store in cache
|
||||||
|
codebaseTokenCache.set(chatId, {
|
||||||
|
chatId,
|
||||||
|
messageId,
|
||||||
|
messageContent,
|
||||||
|
tokenCount,
|
||||||
|
timestamp: now,
|
||||||
|
});
|
||||||
|
|
||||||
|
return tokenCount;
|
||||||
|
}
|
||||||
|
|
||||||
const getProposalHandler = async (
|
const getProposalHandler = async (
|
||||||
_event: IpcMainInvokeEvent,
|
_event: IpcMainInvokeEvent,
|
||||||
{ chatId }: { chatId: number }
|
{ chatId }: { chatId: number }
|
||||||
): Promise<ProposalResult | null> => {
|
): Promise<ProposalResult | null> => {
|
||||||
logger.log(`IPC: get-proposal called for chatId: ${chatId}`);
|
return withLock("get-proposal:" + chatId, async () => {
|
||||||
|
logger.log(`IPC: get-proposal called for chatId: ${chatId}`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Find the latest ASSISTANT message for the chat
|
// Find the latest ASSISTANT message for the chat
|
||||||
const latestAssistantMessage = await db.query.messages.findFirst({
|
const latestAssistantMessage = await db.query.messages.findFirst({
|
||||||
where: and(eq(messages.chatId, chatId), eq(messages.role, "assistant")),
|
where: and(eq(messages.chatId, chatId), eq(messages.role, "assistant")),
|
||||||
orderBy: [desc(messages.createdAt)],
|
orderBy: [desc(messages.createdAt)],
|
||||||
columns: {
|
columns: {
|
||||||
id: true, // Fetch the ID
|
id: true, // Fetch the ID
|
||||||
content: true, // Fetch the content to parse
|
content: true, // Fetch the content to parse
|
||||||
approvalState: true,
|
approvalState: true,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
if (
|
if (
|
||||||
latestAssistantMessage?.approvalState === "rejected" ||
|
latestAssistantMessage?.content &&
|
||||||
latestAssistantMessage?.approvalState === "approved"
|
latestAssistantMessage.id &&
|
||||||
) {
|
!latestAssistantMessage?.approvalState
|
||||||
|
) {
|
||||||
|
const messageId = latestAssistantMessage.id; // Get the message ID
|
||||||
|
logger.log(
|
||||||
|
`Found latest assistant message (ID: ${messageId}), parsing content...`
|
||||||
|
);
|
||||||
|
const messageContent = latestAssistantMessage.content;
|
||||||
|
|
||||||
|
const proposalTitle = getDyadChatSummaryTag(messageContent);
|
||||||
|
|
||||||
|
const proposalWriteFiles = getDyadWriteTags(messageContent);
|
||||||
|
const proposalRenameFiles = getDyadRenameTags(messageContent);
|
||||||
|
const proposalDeleteFiles = getDyadDeleteTags(messageContent);
|
||||||
|
const proposalExecuteSqlQueries = getDyadExecuteSqlTags(messageContent);
|
||||||
|
const packagesAdded = getDyadAddDependencyTags(messageContent);
|
||||||
|
|
||||||
|
const filesChanged = [
|
||||||
|
...proposalWriteFiles.map((tag) => ({
|
||||||
|
name: path.basename(tag.path),
|
||||||
|
path: tag.path,
|
||||||
|
summary: tag.description ?? "(no change summary found)", // Generic summary
|
||||||
|
type: "write" as const,
|
||||||
|
isServerFunction: isServerFunction(tag.path),
|
||||||
|
})),
|
||||||
|
...proposalRenameFiles.map((tag) => ({
|
||||||
|
name: path.basename(tag.to),
|
||||||
|
path: tag.to,
|
||||||
|
summary: `Rename from ${tag.from} to ${tag.to}`,
|
||||||
|
type: "rename" as const,
|
||||||
|
isServerFunction: isServerFunction(tag.to),
|
||||||
|
})),
|
||||||
|
...proposalDeleteFiles.map((tag) => ({
|
||||||
|
name: path.basename(tag),
|
||||||
|
path: tag,
|
||||||
|
summary: `Delete file`,
|
||||||
|
type: "delete" as const,
|
||||||
|
isServerFunction: isServerFunction(tag),
|
||||||
|
})),
|
||||||
|
];
|
||||||
|
// Check if we have enough information to create a proposal
|
||||||
|
if (
|
||||||
|
filesChanged.length > 0 ||
|
||||||
|
packagesAdded.length > 0 ||
|
||||||
|
proposalExecuteSqlQueries.length > 0
|
||||||
|
) {
|
||||||
|
const proposal: CodeProposal = {
|
||||||
|
type: "code-proposal",
|
||||||
|
// Use parsed title or a default title if summary tag is missing but write tags exist
|
||||||
|
title: proposalTitle ?? "Proposed File Changes",
|
||||||
|
securityRisks: [], // Keep empty
|
||||||
|
filesChanged,
|
||||||
|
packagesAdded,
|
||||||
|
sqlQueries: proposalExecuteSqlQueries.map((query) => ({
|
||||||
|
content: query.content,
|
||||||
|
description: query.description,
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
logger.log(
|
||||||
|
"Generated code proposal. title=",
|
||||||
|
proposal.title,
|
||||||
|
"files=",
|
||||||
|
proposal.filesChanged.length,
|
||||||
|
"packages=",
|
||||||
|
proposal.packagesAdded.length
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
proposal: proposal,
|
||||||
|
chatId,
|
||||||
|
messageId,
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
logger.log(
|
||||||
|
"No relevant tags found in the latest assistant message content."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
// Get all chat messages to calculate token usage
|
// Get all chat messages to calculate token usage
|
||||||
const chat = await db.query.chats.findFirst({
|
const chat = await db.query.chats.findFirst({
|
||||||
where: eq(chats.id, chatId),
|
where: eq(chats.id, chatId),
|
||||||
with: {
|
with: {
|
||||||
|
app: true,
|
||||||
messages: {
|
messages: {
|
||||||
orderBy: (messages, { asc }) => [asc(messages.createdAt)],
|
orderBy: (messages, { asc }) => [asc(messages.createdAt)],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
if (chat) {
|
if (latestAssistantMessage && chat) {
|
||||||
// Calculate total tokens from message history
|
// Calculate total tokens from message history
|
||||||
const totalTokens = estimateMessagesTokens(chat.messages);
|
const messagesTokenCount = estimateMessagesTokens(chat.messages);
|
||||||
|
|
||||||
|
// Use cached token count or calculate new one
|
||||||
|
const codebaseTokenCount = await getCodebaseTokenCount(
|
||||||
|
chatId,
|
||||||
|
latestAssistantMessage.id,
|
||||||
|
latestAssistantMessage.content || "",
|
||||||
|
chat.app.path
|
||||||
|
);
|
||||||
|
|
||||||
|
const totalTokens = messagesTokenCount + codebaseTokenCount;
|
||||||
const contextWindow = Math.min(getContextWindow(), 100_000);
|
const contextWindow = Math.min(getContextWindow(), 100_000);
|
||||||
logger.log(
|
logger.log(
|
||||||
`Token usage: ${totalTokens}/${contextWindow} (${
|
`Token usage: ${totalTokens}/${contextWindow} (${
|
||||||
@@ -102,92 +271,11 @@ const getProposalHandler = async (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Error processing proposal for chatId ${chatId}:`, error);
|
||||||
|
return null; // Indicate DB or processing error
|
||||||
}
|
}
|
||||||
|
});
|
||||||
if (latestAssistantMessage?.content && latestAssistantMessage.id) {
|
|
||||||
const messageId = latestAssistantMessage.id; // Get the message ID
|
|
||||||
logger.log(
|
|
||||||
`Found latest assistant message (ID: ${messageId}), parsing content...`
|
|
||||||
);
|
|
||||||
const messageContent = latestAssistantMessage.content;
|
|
||||||
|
|
||||||
const proposalTitle = getDyadChatSummaryTag(messageContent);
|
|
||||||
|
|
||||||
const proposalWriteFiles = getDyadWriteTags(messageContent);
|
|
||||||
const proposalRenameFiles = getDyadRenameTags(messageContent);
|
|
||||||
const proposalDeleteFiles = getDyadDeleteTags(messageContent);
|
|
||||||
const proposalExecuteSqlQueries = getDyadExecuteSqlTags(messageContent);
|
|
||||||
const packagesAdded = getDyadAddDependencyTags(messageContent);
|
|
||||||
|
|
||||||
const filesChanged = [
|
|
||||||
...proposalWriteFiles.map((tag) => ({
|
|
||||||
name: path.basename(tag.path),
|
|
||||||
path: tag.path,
|
|
||||||
summary: tag.description ?? "(no change summary found)", // Generic summary
|
|
||||||
type: "write" as const,
|
|
||||||
isServerFunction: isServerFunction(tag.path),
|
|
||||||
})),
|
|
||||||
...proposalRenameFiles.map((tag) => ({
|
|
||||||
name: path.basename(tag.to),
|
|
||||||
path: tag.to,
|
|
||||||
summary: `Rename from ${tag.from} to ${tag.to}`,
|
|
||||||
type: "rename" as const,
|
|
||||||
isServerFunction: isServerFunction(tag.to),
|
|
||||||
})),
|
|
||||||
...proposalDeleteFiles.map((tag) => ({
|
|
||||||
name: path.basename(tag),
|
|
||||||
path: tag,
|
|
||||||
summary: `Delete file`,
|
|
||||||
type: "delete" as const,
|
|
||||||
isServerFunction: isServerFunction(tag),
|
|
||||||
})),
|
|
||||||
];
|
|
||||||
// Check if we have enough information to create a proposal
|
|
||||||
if (
|
|
||||||
filesChanged.length > 0 ||
|
|
||||||
packagesAdded.length > 0 ||
|
|
||||||
proposalExecuteSqlQueries.length > 0
|
|
||||||
) {
|
|
||||||
const proposal: CodeProposal = {
|
|
||||||
type: "code-proposal",
|
|
||||||
// Use parsed title or a default title if summary tag is missing but write tags exist
|
|
||||||
title: proposalTitle ?? "Proposed File Changes",
|
|
||||||
securityRisks: [], // Keep empty
|
|
||||||
filesChanged,
|
|
||||||
packagesAdded,
|
|
||||||
sqlQueries: proposalExecuteSqlQueries.map((query) => ({
|
|
||||||
content: query.content,
|
|
||||||
description: query.description,
|
|
||||||
})),
|
|
||||||
};
|
|
||||||
logger.log(
|
|
||||||
"Generated code proposal. title=",
|
|
||||||
proposal.title,
|
|
||||||
"files=",
|
|
||||||
proposal.filesChanged.length,
|
|
||||||
"packages=",
|
|
||||||
proposal.packagesAdded.length
|
|
||||||
);
|
|
||||||
|
|
||||||
return {
|
|
||||||
proposal: proposal,
|
|
||||||
chatId,
|
|
||||||
messageId,
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
logger.log(
|
|
||||||
"No relevant tags found in the latest assistant message content."
|
|
||||||
);
|
|
||||||
return null; // No proposal could be generated
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
logger.log(`No assistant message found for chatId: ${chatId}`);
|
|
||||||
return null; // No message found
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Error processing proposal for chatId ${chatId}:`, error);
|
|
||||||
return null; // Indicate DB or processing error
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Handler to approve a proposal (process actions and update message)
|
// Handler to approve a proposal (process actions and update message)
|
||||||
|
|||||||
@@ -1,6 +1,10 @@
|
|||||||
import fs from "node:fs";
|
import fs from "node:fs";
|
||||||
|
import fsAsync from "node:fs/promises";
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
import { isIgnored } from "isomorphic-git";
|
import { isIgnored } from "isomorphic-git";
|
||||||
|
import log from "electron-log";
|
||||||
|
|
||||||
|
const logger = log.scope("utils/codebase");
|
||||||
|
|
||||||
// File extensions to include in the extraction
|
// File extensions to include in the extraction
|
||||||
const ALLOWED_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".css", ".html"];
|
const ALLOWED_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".css", ".html"];
|
||||||
@@ -14,6 +18,23 @@ const ALWAYS_INCLUDE_FILES = ["package.json"];
|
|||||||
// Maximum file size to include (in bytes) - 100KB
|
// Maximum file size to include (in bytes) - 100KB
|
||||||
const MAX_FILE_SIZE = 100 * 1024;
|
const MAX_FILE_SIZE = 100 * 1024;
|
||||||
|
|
||||||
|
// Maximum size for fileContentCache
|
||||||
|
const MAX_FILE_CACHE_SIZE = 500;
|
||||||
|
|
||||||
|
// File content cache with timestamps
|
||||||
|
type FileCache = {
|
||||||
|
content: string;
|
||||||
|
mtime: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Cache for file contents
|
||||||
|
const fileContentCache = new Map<string, FileCache>();
|
||||||
|
|
||||||
|
// Cache for git ignored paths
|
||||||
|
const gitIgnoreCache = new Map<string, boolean>();
|
||||||
|
// Map to store .gitignore file paths and their modification times
|
||||||
|
const gitIgnoreMtimes = new Map<string, number>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if a path should be ignored based on git ignore rules
|
* Check if a path should be ignored based on git ignore rules
|
||||||
*/
|
*/
|
||||||
@@ -22,14 +43,108 @@ async function isGitIgnored(
|
|||||||
baseDir: string
|
baseDir: string
|
||||||
): Promise<boolean> {
|
): Promise<boolean> {
|
||||||
try {
|
try {
|
||||||
|
// Check if any relevant .gitignore has been modified
|
||||||
|
// Git checks .gitignore files in the path from the repo root to the file
|
||||||
|
let currentDir = baseDir;
|
||||||
|
const pathParts = path.relative(baseDir, filePath).split(path.sep);
|
||||||
|
let shouldClearCache = false;
|
||||||
|
|
||||||
|
// Check root .gitignore
|
||||||
|
const rootGitIgnorePath = path.join(baseDir, ".gitignore");
|
||||||
|
try {
|
||||||
|
const stats = await fsAsync.stat(rootGitIgnorePath);
|
||||||
|
const lastMtime = gitIgnoreMtimes.get(rootGitIgnorePath) || 0;
|
||||||
|
if (stats.mtimeMs > lastMtime) {
|
||||||
|
gitIgnoreMtimes.set(rootGitIgnorePath, stats.mtimeMs);
|
||||||
|
shouldClearCache = true;
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
// Root .gitignore might not exist, which is fine
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check .gitignore files in parent directories
|
||||||
|
for (let i = 0; i < pathParts.length - 1; i++) {
|
||||||
|
currentDir = path.join(currentDir, pathParts[i]);
|
||||||
|
const gitIgnorePath = path.join(currentDir, ".gitignore");
|
||||||
|
|
||||||
|
try {
|
||||||
|
const stats = await fsAsync.stat(gitIgnorePath);
|
||||||
|
const lastMtime = gitIgnoreMtimes.get(gitIgnorePath) || 0;
|
||||||
|
if (stats.mtimeMs > lastMtime) {
|
||||||
|
gitIgnoreMtimes.set(gitIgnorePath, stats.mtimeMs);
|
||||||
|
shouldClearCache = true;
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
// This directory might not have a .gitignore, which is fine
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear cache if any .gitignore was modified
|
||||||
|
if (shouldClearCache) {
|
||||||
|
gitIgnoreCache.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
const cacheKey = `${baseDir}:${filePath}`;
|
||||||
|
|
||||||
|
if (gitIgnoreCache.has(cacheKey)) {
|
||||||
|
return gitIgnoreCache.get(cacheKey)!;
|
||||||
|
}
|
||||||
|
|
||||||
const relativePath = path.relative(baseDir, filePath);
|
const relativePath = path.relative(baseDir, filePath);
|
||||||
return await isIgnored({ fs, dir: baseDir, filepath: relativePath });
|
const result = await isIgnored({
|
||||||
|
fs,
|
||||||
|
dir: baseDir,
|
||||||
|
filepath: relativePath,
|
||||||
|
});
|
||||||
|
|
||||||
|
gitIgnoreCache.set(cacheKey, result);
|
||||||
|
return result;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`Error checking if path is git ignored: ${filePath}`, error);
|
logger.error(`Error checking if path is git ignored: ${filePath}`, error);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read file contents with caching based on last modified time
|
||||||
|
*/
|
||||||
|
async function readFileWithCache(filePath: string): Promise<string | null> {
|
||||||
|
try {
|
||||||
|
// Get file stats to check the modification time
|
||||||
|
const stats = await fsAsync.stat(filePath);
|
||||||
|
const currentMtime = stats.mtimeMs;
|
||||||
|
|
||||||
|
// If file is in cache and hasn't been modified, use cached content
|
||||||
|
if (fileContentCache.has(filePath)) {
|
||||||
|
const cache = fileContentCache.get(filePath)!;
|
||||||
|
if (cache.mtime === currentMtime) {
|
||||||
|
return cache.content;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read file and update cache
|
||||||
|
const content = await fsAsync.readFile(filePath, "utf-8");
|
||||||
|
fileContentCache.set(filePath, { content, mtime: currentMtime });
|
||||||
|
|
||||||
|
// Manage cache size by clearing oldest entries when it gets too large
|
||||||
|
if (fileContentCache.size > MAX_FILE_CACHE_SIZE) {
|
||||||
|
// Get the oldest 25% of entries to remove
|
||||||
|
const entriesToDelete = Math.ceil(MAX_FILE_CACHE_SIZE * 0.25);
|
||||||
|
const keys = Array.from(fileContentCache.keys());
|
||||||
|
|
||||||
|
// Remove oldest entries (first in, first out)
|
||||||
|
for (let i = 0; i < entriesToDelete; i++) {
|
||||||
|
fileContentCache.delete(keys[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return content;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Error reading file: ${filePath}`, error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Recursively walk a directory and collect all relevant files
|
* Recursively walk a directory and collect all relevant files
|
||||||
*/
|
*/
|
||||||
@@ -37,25 +152,29 @@ async function collectFiles(dir: string, baseDir: string): Promise<string[]> {
|
|||||||
const files: string[] = [];
|
const files: string[] = [];
|
||||||
|
|
||||||
// Check if directory exists
|
// Check if directory exists
|
||||||
if (!fs.existsSync(dir)) {
|
try {
|
||||||
|
await fsAsync.access(dir);
|
||||||
|
} catch {
|
||||||
|
// Directory doesn't exist or is not accessible
|
||||||
return files;
|
return files;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Read directory contents
|
// Read directory contents
|
||||||
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
const entries = await fsAsync.readdir(dir, { withFileTypes: true });
|
||||||
|
|
||||||
for (const entry of entries) {
|
// Process entries concurrently
|
||||||
|
const promises = entries.map(async (entry) => {
|
||||||
const fullPath = path.join(dir, entry.name);
|
const fullPath = path.join(dir, entry.name);
|
||||||
|
|
||||||
// Skip excluded directories
|
// Skip excluded directories
|
||||||
if (entry.isDirectory() && EXCLUDED_DIRS.includes(entry.name)) {
|
if (entry.isDirectory() && EXCLUDED_DIRS.includes(entry.name)) {
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip if the entry is git ignored
|
// Skip if the entry is git ignored
|
||||||
if (await isGitIgnored(fullPath, baseDir)) {
|
if (await isGitIgnored(fullPath, baseDir)) {
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (entry.isDirectory()) {
|
if (entry.isDirectory()) {
|
||||||
@@ -69,22 +188,24 @@ async function collectFiles(dir: string, baseDir: string): Promise<string[]> {
|
|||||||
|
|
||||||
// Skip files that are too large
|
// Skip files that are too large
|
||||||
try {
|
try {
|
||||||
const stats = fs.statSync(fullPath);
|
const stats = await fsAsync.stat(fullPath);
|
||||||
if (stats.size > MAX_FILE_SIZE) {
|
if (stats.size > MAX_FILE_SIZE) {
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`Error checking file size: ${fullPath}`, error);
|
logger.error(`Error checking file size: ${fullPath}`, error);
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ALLOWED_EXTENSIONS.includes(ext) || shouldAlwaysInclude) {
|
if (ALLOWED_EXTENSIONS.includes(ext) || shouldAlwaysInclude) {
|
||||||
files.push(fullPath);
|
files.push(fullPath);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
|
|
||||||
|
await Promise.all(promises);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`Error reading directory ${dir}:`, error);
|
logger.error(`Error reading directory ${dir}:`, error);
|
||||||
}
|
}
|
||||||
|
|
||||||
return files;
|
return files;
|
||||||
@@ -93,7 +214,7 @@ async function collectFiles(dir: string, baseDir: string): Promise<string[]> {
|
|||||||
/**
|
/**
|
||||||
* Format a file for inclusion in the codebase extract
|
* Format a file for inclusion in the codebase extract
|
||||||
*/
|
*/
|
||||||
function formatFile(filePath: string, baseDir: string): string {
|
async function formatFile(filePath: string, baseDir: string): Promise<string> {
|
||||||
try {
|
try {
|
||||||
const relativePath = path.relative(baseDir, filePath);
|
const relativePath = path.relative(baseDir, filePath);
|
||||||
|
|
||||||
@@ -114,7 +235,15 @@ function formatFile(filePath: string, baseDir: string): string {
|
|||||||
`;
|
`;
|
||||||
}
|
}
|
||||||
|
|
||||||
const content = fs.readFileSync(filePath, "utf-8");
|
const content = await readFileWithCache(filePath);
|
||||||
|
|
||||||
|
if (content === null) {
|
||||||
|
return `<dyad-file path="${relativePath}">
|
||||||
|
// Error reading file
|
||||||
|
</dyad-file>
|
||||||
|
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
return `<dyad-file path="${relativePath}">
|
return `<dyad-file path="${relativePath}">
|
||||||
${content}
|
${content}
|
||||||
@@ -122,7 +251,7 @@ ${content}
|
|||||||
|
|
||||||
`;
|
`;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`Error reading file: ${filePath}`, error);
|
logger.error(`Error reading file: ${filePath}`, error);
|
||||||
return `<dyad-file path="${path.relative(baseDir, filePath)}">
|
return `<dyad-file path="${path.relative(baseDir, filePath)}">
|
||||||
// Error reading file: ${error}
|
// Error reading file: ${error}
|
||||||
</dyad-file>
|
</dyad-file>
|
||||||
@@ -137,26 +266,53 @@ ${content}
|
|||||||
* @returns A string containing formatted file contents
|
* @returns A string containing formatted file contents
|
||||||
*/
|
*/
|
||||||
export async function extractCodebase(appPath: string): Promise<string> {
|
export async function extractCodebase(appPath: string): Promise<string> {
|
||||||
if (!fs.existsSync(appPath)) {
|
try {
|
||||||
return `# Error: Directory ${appPath} does not exist`;
|
await fsAsync.access(appPath);
|
||||||
|
} catch {
|
||||||
|
return `# Error: Directory ${appPath} does not exist or is not accessible`;
|
||||||
}
|
}
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
// Collect all relevant files
|
// Collect all relevant files
|
||||||
const files = await collectFiles(appPath, appPath);
|
const files = await collectFiles(appPath, appPath);
|
||||||
|
|
||||||
// Sort files to prioritize important files
|
// Sort files by modification time (oldest first)
|
||||||
const sortedFiles = sortFilesByImportance(files, appPath);
|
// This is important for cache-ability.
|
||||||
|
const sortedFiles = await sortFilesByModificationTime(files);
|
||||||
|
|
||||||
// Format files
|
// Format files
|
||||||
let output = "";
|
let output = "";
|
||||||
|
const formatPromises = sortedFiles.map((file) => formatFile(file, appPath));
|
||||||
|
const formattedFiles = await Promise.all(formatPromises);
|
||||||
|
output = formattedFiles.join("");
|
||||||
|
|
||||||
for (const file of sortedFiles) {
|
const endTime = Date.now();
|
||||||
output += formatFile(file, appPath);
|
logger.log("extractCodebase: time taken", endTime - startTime);
|
||||||
}
|
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sort files by their modification timestamp (oldest first)
|
||||||
|
*/
|
||||||
|
async function sortFilesByModificationTime(files: string[]): Promise<string[]> {
|
||||||
|
// Get stats for all files
|
||||||
|
const fileStats = await Promise.all(
|
||||||
|
files.map(async (file) => {
|
||||||
|
try {
|
||||||
|
const stats = await fsAsync.stat(file);
|
||||||
|
return { file, mtime: stats.mtimeMs };
|
||||||
|
} catch (error) {
|
||||||
|
// If there's an error getting stats, use current time as fallback
|
||||||
|
logger.error(`Error getting file stats for ${file}:`, error);
|
||||||
|
return { file, mtime: Date.now() };
|
||||||
|
}
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
// Sort by modification time (oldest first)
|
||||||
|
return fileStats.sort((a, b) => a.mtime - b.mtime).map((item) => item.file);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sort files by their importance for context
|
* Sort files by their importance for context
|
||||||
*/
|
*/
|
||||||
|
|||||||
Reference in New Issue
Block a user