Files
moreminimore-vibe/src/utils/codebase.ts
2025-05-01 14:57:26 -07:00

365 lines
10 KiB
TypeScript

import fs from "node:fs";
import fsAsync from "node:fs/promises";
import path from "node:path";
import { isIgnored } from "isomorphic-git";
import log from "electron-log";
const logger = log.scope("utils/codebase");
// File extensions to include in the extraction
const ALLOWED_EXTENSIONS = [
".ts",
".tsx",
".js",
".jsx",
".mjs",
".cjs",
".mts",
".cts",
".css",
".html",
".md",
];
// Directories to always exclude
const EXCLUDED_DIRS = ["node_modules", ".git", "dist", "build"];
// Files to always include, regardless of extension
const ALWAYS_INCLUDE_FILES = ["package.json"];
// Maximum file size to include (in bytes) - 100KB
const MAX_FILE_SIZE = 100 * 1024;
// Maximum size for fileContentCache
const MAX_FILE_CACHE_SIZE = 500;
// File content cache with timestamps
type FileCache = {
content: string;
mtime: number;
};
// Cache for file contents
const fileContentCache = new Map<string, FileCache>();
// Cache for git ignored paths
const gitIgnoreCache = new Map<string, boolean>();
// Map to store .gitignore file paths and their modification times
const gitIgnoreMtimes = new Map<string, number>();
/**
* Check if a path should be ignored based on git ignore rules
*/
async function isGitIgnored(
filePath: string,
baseDir: string
): Promise<boolean> {
try {
// Check if any relevant .gitignore has been modified
// Git checks .gitignore files in the path from the repo root to the file
let currentDir = baseDir;
const pathParts = path.relative(baseDir, filePath).split(path.sep);
let shouldClearCache = false;
// Check root .gitignore
const rootGitIgnorePath = path.join(baseDir, ".gitignore");
try {
const stats = await fsAsync.stat(rootGitIgnorePath);
const lastMtime = gitIgnoreMtimes.get(rootGitIgnorePath) || 0;
if (stats.mtimeMs > lastMtime) {
gitIgnoreMtimes.set(rootGitIgnorePath, stats.mtimeMs);
shouldClearCache = true;
}
} catch (error) {
// Root .gitignore might not exist, which is fine
}
// Check .gitignore files in parent directories
for (let i = 0; i < pathParts.length - 1; i++) {
currentDir = path.join(currentDir, pathParts[i]);
const gitIgnorePath = path.join(currentDir, ".gitignore");
try {
const stats = await fsAsync.stat(gitIgnorePath);
const lastMtime = gitIgnoreMtimes.get(gitIgnorePath) || 0;
if (stats.mtimeMs > lastMtime) {
gitIgnoreMtimes.set(gitIgnorePath, stats.mtimeMs);
shouldClearCache = true;
}
} catch (error) {
// This directory might not have a .gitignore, which is fine
}
}
// Clear cache if any .gitignore was modified
if (shouldClearCache) {
gitIgnoreCache.clear();
}
const cacheKey = `${baseDir}:${filePath}`;
if (gitIgnoreCache.has(cacheKey)) {
return gitIgnoreCache.get(cacheKey)!;
}
const relativePath = path.relative(baseDir, filePath);
const result = await isIgnored({
fs,
dir: baseDir,
filepath: relativePath,
});
gitIgnoreCache.set(cacheKey, result);
return result;
} catch (error) {
logger.error(`Error checking if path is git ignored: ${filePath}`, error);
return false;
}
}
/**
* Read file contents with caching based on last modified time
*/
async function readFileWithCache(filePath: string): Promise<string | null> {
try {
// Get file stats to check the modification time
const stats = await fsAsync.stat(filePath);
const currentMtime = stats.mtimeMs;
// If file is in cache and hasn't been modified, use cached content
if (fileContentCache.has(filePath)) {
const cache = fileContentCache.get(filePath)!;
if (cache.mtime === currentMtime) {
return cache.content;
}
}
// Read file and update cache
const content = await fsAsync.readFile(filePath, "utf-8");
fileContentCache.set(filePath, { content, mtime: currentMtime });
// Manage cache size by clearing oldest entries when it gets too large
if (fileContentCache.size > MAX_FILE_CACHE_SIZE) {
// Get the oldest 25% of entries to remove
const entriesToDelete = Math.ceil(MAX_FILE_CACHE_SIZE * 0.25);
const keys = Array.from(fileContentCache.keys());
// Remove oldest entries (first in, first out)
for (let i = 0; i < entriesToDelete; i++) {
fileContentCache.delete(keys[i]);
}
}
return content;
} catch (error) {
logger.error(`Error reading file: ${filePath}`, error);
return null;
}
}
/**
* Recursively walk a directory and collect all relevant files
*/
async function collectFiles(dir: string, baseDir: string): Promise<string[]> {
const files: string[] = [];
// Check if directory exists
try {
await fsAsync.access(dir);
} catch {
// Directory doesn't exist or is not accessible
return files;
}
try {
// Read directory contents
const entries = await fsAsync.readdir(dir, { withFileTypes: true });
// Process entries concurrently
const promises = entries.map(async (entry) => {
const fullPath = path.join(dir, entry.name);
// Skip excluded directories
if (entry.isDirectory() && EXCLUDED_DIRS.includes(entry.name)) {
return;
}
// Skip if the entry is git ignored
if (await isGitIgnored(fullPath, baseDir)) {
return;
}
if (entry.isDirectory()) {
// Recursively process subdirectories
const subDirFiles = await collectFiles(fullPath, baseDir);
files.push(...subDirFiles);
} else if (entry.isFile()) {
// Check file extension and filename
const ext = path.extname(entry.name).toLowerCase();
const shouldAlwaysInclude = ALWAYS_INCLUDE_FILES.includes(entry.name);
// Skip files that are too large
try {
const stats = await fsAsync.stat(fullPath);
if (stats.size > MAX_FILE_SIZE) {
return;
}
} catch (error) {
logger.error(`Error checking file size: ${fullPath}`, error);
return;
}
if (ALLOWED_EXTENSIONS.includes(ext) || shouldAlwaysInclude) {
files.push(fullPath);
}
}
});
await Promise.all(promises);
} catch (error) {
logger.error(`Error reading directory ${dir}:`, error);
}
return files;
}
/**
* Format a file for inclusion in the codebase extract
*/
async function formatFile(filePath: string, baseDir: string): Promise<string> {
try {
const relativePath = path.relative(baseDir, filePath);
// Skip large configuration files or generated code (just include the path)
if (
relativePath.includes(path.join("src", "components", "ui")) ||
relativePath.includes("eslint.config") ||
relativePath.includes("tsconfig.json") ||
relativePath.includes("package-lock.json") ||
// These should already be excluded based on file type, but
// just in case, we'll redact the contents here.
relativePath.includes(".env")
) {
return `<dyad-file path="${relativePath}">
// Contents omitted for brevity
</dyad-file>
`;
}
const content = await readFileWithCache(filePath);
if (content === null) {
return `<dyad-file path="${relativePath}">
// Error reading file
</dyad-file>
`;
}
return `<dyad-file path="${relativePath}">
${content}
</dyad-file>
`;
} catch (error) {
logger.error(`Error reading file: ${filePath}`, error);
return `<dyad-file path="${path.relative(baseDir, filePath)}">
// Error reading file: ${error}
</dyad-file>
`;
}
}
/**
* Extract and format codebase files as a string to be included in prompts
* @param appPath - Path to the codebase to extract
* @returns A string containing formatted file contents
*/
export async function extractCodebase(appPath: string): Promise<string> {
try {
await fsAsync.access(appPath);
} catch {
return `# Error: Directory ${appPath} does not exist or is not accessible`;
}
const startTime = Date.now();
// Collect all relevant files
const files = await collectFiles(appPath, appPath);
// Sort files by modification time (oldest first)
// This is important for cache-ability.
const sortedFiles = await sortFilesByModificationTime(files);
// Format files
let output = "";
const formatPromises = sortedFiles.map((file) => formatFile(file, appPath));
const formattedFiles = await Promise.all(formatPromises);
output = formattedFiles.join("");
const endTime = Date.now();
logger.log("extractCodebase: time taken", endTime - startTime);
return output;
}
/**
* Sort files by their modification timestamp (oldest first)
*/
async function sortFilesByModificationTime(files: string[]): Promise<string[]> {
// Get stats for all files
const fileStats = await Promise.all(
files.map(async (file) => {
try {
const stats = await fsAsync.stat(file);
return { file, mtime: stats.mtimeMs };
} catch (error) {
// If there's an error getting stats, use current time as fallback
logger.error(`Error getting file stats for ${file}:`, error);
return { file, mtime: Date.now() };
}
})
);
// Sort by modification time (oldest first)
return fileStats.sort((a, b) => a.mtime - b.mtime).map((item) => item.file);
}
/**
* Sort files by their importance for context
*/
function sortFilesByImportance(files: string[], baseDir: string): string[] {
// Define patterns for important files
const highPriorityPatterns = [
new RegExp(`(^|/)${ALWAYS_INCLUDE_FILES[0]}$`),
/tsconfig\.json$/,
/README\.md$/,
/index\.(ts|js)x?$/,
/main\.(ts|js)x?$/,
/app\.(ts|js)x?$/,
];
// Custom sorting function
return [...files].sort((a, b) => {
const relativeA = path.relative(baseDir, a);
const relativeB = path.relative(baseDir, b);
// Check if file A matches any high priority pattern
const aIsHighPriority = highPriorityPatterns.some((pattern) =>
pattern.test(relativeA)
);
// Check if file B matches any high priority pattern
const bIsHighPriority = highPriorityPatterns.some((pattern) =>
pattern.test(relativeB)
);
// Sort by priority first
if (aIsHighPriority && !bIsHighPriority) return -1;
if (!aIsHighPriority && bIsHighPriority) return 1;
// If both are same priority, sort alphabetically
return relativeA.localeCompare(relativeB);
});
}