diff --git a/package-lock.json b/package-lock.json index ce1130c..d9a902d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "dyad", - "version": "0.25.0-beta.1", + "version": "0.27.0-beta.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "dyad", - "version": "0.25.0-beta.1", + "version": "0.27.0-beta.1", "license": "MIT", "dependencies": { "@ai-sdk/amazon-bedrock": "^3.0.15", @@ -62,6 +62,7 @@ "electron-playwright-helpers": "^1.7.1", "electron-squirrel-startup": "^1.0.1", "esbuild-register": "^3.6.0", + "fastest-levenshtein": "^1.0.16", "fix-path": "^4.0.0", "framer-motion": "^12.6.3", "geist": "^1.3.1", @@ -11830,6 +11831,15 @@ "dev": true, "license": "MIT" }, + "node_modules/fastest-levenshtein": { + "version": "1.0.16", + "resolved": "https://registry.npmjs.org/fastest-levenshtein/-/fastest-levenshtein-1.0.16.tgz", + "integrity": "sha512-eRnCtTTtGZFpQCwhJiUOuxPQWRXVKYDn0b2PeHfXL6/Zi53SLAzAHfVhVWK2AryC/WH05kGfxhFIPvTF0SXQzg==", + "license": "MIT", + "engines": { + "node": ">= 4.9.1" + } + }, "node_modules/fastq": { "version": "1.19.1", "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.1.tgz", diff --git a/package.json b/package.json index c2f4489..3c1104f 100644 --- a/package.json +++ b/package.json @@ -138,6 +138,7 @@ "electron-playwright-helpers": "^1.7.1", "electron-squirrel-startup": "^1.0.1", "esbuild-register": "^3.6.0", + "fastest-levenshtein": "^1.0.16", "fix-path": "^4.0.0", "framer-motion": "^12.6.3", "geist": "^1.3.1", diff --git a/src/pro/main/ipc/processors/search_replace_processor.test.ts b/src/pro/main/ipc/processors/search_replace_processor.test.ts new file mode 100644 index 0000000..6b44b4c --- /dev/null +++ b/src/pro/main/ipc/processors/search_replace_processor.test.ts @@ -0,0 +1,169 @@ +import { describe, it, expect } from "vitest"; +import { applySearchReplace } from "./search_replace_processor"; + +describe("applySearchReplace", () => { + describe("fuzzy matching with Levenshtein distance", () => { + it("should match content with minor typos", () => { + const originalContent = `function hello() { + console.log("Hello, World!"); + return true; +}`; + + // Search block has a typo: "consle" instead of "console" + const diffContent = `<<<<<<< SEARCH +function hello() { + consle.log("Hello, World!"); + return true; +} +======= +function hello() { + console.log("Hello, Universe!"); + return true; +} +>>>>>>> REPLACE`; + + const result = applySearchReplace(originalContent, diffContent); + expect(result.success).toBe(true); + expect(result.content).toContain("Hello, Universe!"); + }); + + it("should match content with smart quotes normalized", () => { + const originalContent = `function greet() { + console.log("Hello"); +}`; + + // Search block uses smart quotes + const diffContent = `<<<<<<< SEARCH +function greet() { + console.log("Hello"); +} +======= +function greet() { + console.log("Goodbye"); +} +>>>>>>> REPLACE`; + + const result = applySearchReplace(originalContent, diffContent); + expect(result.success).toBe(true); + expect(result.content).toContain("Goodbye"); + }); + + it("should fail when similarity is below threshold", () => { + const originalContent = `function hello() { + console.log("Hello, World!"); + return true; +}`; + + // Search block is too different (multiple typos and changes) + const diffContent = `<<<<<<< SEARCH +function goodbye() { + consle.error("Bye, Earth!"); + return false; +} +======= +function hello() { + console.log("Hello, Universe!"); + return true; +} +>>>>>>> REPLACE`; + + const result = applySearchReplace(originalContent, diffContent); + expect(result.success).toBe(false); + expect(result.error).toContain("Best fuzzy match had similarity"); + }); + + it("should prefer exact match over fuzzy match", () => { + const originalContent = `function hello() { + console.log("Hello"); +} + +function hello() { + consle.log("Hello"); +}`; + + // Should match the first exact occurrence, not the fuzzy one + const diffContent = `<<<<<<< SEARCH +function hello() { + console.log("Hello"); +} +======= +function hello() { + console.log("Goodbye"); +} +>>>>>>> REPLACE`; + + const result = applySearchReplace(originalContent, diffContent); + expect(result.success).toBe(true); + // Should only replace the first exact match + expect(result.content).toContain('console.log("Goodbye")'); + expect(result.content).toContain('consle.log("Hello")'); + }); + + it("should handle whitespace differences with lenient matching before fuzzy", () => { + const originalContent = `function test() { + console.log("test"); +}`; + + // Different indentation + const diffContent = `<<<<<<< SEARCH +function test() { + console.log("test"); +} +======= +function test() { + console.log("updated"); +} +>>>>>>> REPLACE`; + + const result = applySearchReplace(originalContent, diffContent); + expect(result.success).toBe(true); + expect(result.content).toContain("updated"); + }); + }); + + describe("existing functionality", () => { + it("should handle exact matches", () => { + const originalContent = `function hello() { + console.log("Hello"); +}`; + + const diffContent = `<<<<<<< SEARCH +function hello() { + console.log("Hello"); +} +======= +function hello() { + console.log("Goodbye"); +} +>>>>>>> REPLACE`; + + const result = applySearchReplace(originalContent, diffContent); + expect(result.success).toBe(true); + expect(result.content).toContain("Goodbye"); + }); + + it("should detect ambiguous matches", () => { + const originalContent = `function hello() { + console.log("Hello"); +} + +function hello() { + console.log("Hello"); +}`; + + const diffContent = `<<<<<<< SEARCH +function hello() { + console.log("Hello"); +} +======= +function hello() { + console.log("Goodbye"); +} +>>>>>>> REPLACE`; + + const result = applySearchReplace(originalContent, diffContent); + expect(result.success).toBe(false); + expect(result.error).toContain("ambiguous"); + }); + }); +}); diff --git a/src/pro/main/ipc/processors/search_replace_processor.ts b/src/pro/main/ipc/processors/search_replace_processor.ts index 934ed30..8b19d74 100644 --- a/src/pro/main/ipc/processors/search_replace_processor.ts +++ b/src/pro/main/ipc/processors/search_replace_processor.ts @@ -1,6 +1,17 @@ /* eslint-disable no-irregular-whitespace */ import { parseSearchReplaceBlocks } from "@/pro/shared/search_replace_parser"; +import { distance } from "fastest-levenshtein"; +import { normalizeString } from "@/utils/text_normalization"; + +// Minimum similarity threshold for fuzzy matching (0 to 1, where 1 is exact match) +const FUZZY_MATCH_THRESHOLD = 0.9; + +// Early termination threshold - stop searching if we find a match this good +const EARLY_STOP_THRESHOLD = 0.95; + +// Maximum time to spend on fuzzy matching (in milliseconds) +const MAX_FUZZY_SEARCH_TIME_MS = 10_000; // 10 seconds function unescapeMarkers(content: string): string { return content @@ -9,6 +20,148 @@ function unescapeMarkers(content: string): string { .replace(/^\\>>>>>>>/gm, ">>>>>>>"); } +/** + * Calculate similarity between two strings using Levenshtein distance + * Returns a value between 0 and 1, where 1 is an exact match + */ +function getSimilarity(original: string, search: string): number { + // Empty searches are no longer supported + if (search === "") { + return 0; + } + + // Use the normalizeString utility to handle smart quotes and other special characters + const normalizedOriginal = normalizeString(original); + const normalizedSearch = normalizeString(search); + + if (normalizedOriginal === normalizedSearch) { + return 1; + } + + // Calculate Levenshtein distance using fastest-levenshtein's distance function + const dist = distance(normalizedOriginal, normalizedSearch); + + // Calculate similarity ratio (0 to 1, where 1 is an exact match) + const maxLength = Math.max( + normalizedOriginal.length, + normalizedSearch.length, + ); + return 1 - dist / maxLength; +} + +/** + * Quick scoring function that counts how many lines exactly match. + * This is much faster than Levenshtein and serves as a good pre-filter. + */ +function quickScoreByExactLines( + targetLines: string[], + searchLines: string[], + startIdx: number, +): number { + let exactMatches = 0; + + for (let i = 0; i < searchLines.length; i++) { + if (startIdx + i >= targetLines.length) break; + + if ( + normalizeString(targetLines[startIdx + i]) === + normalizeString(searchLines[i]) + ) { + exactMatches++; + } + } + + return exactMatches / searchLines.length; +} + +/** + * Fast fuzzy search using a two-pass approach: + * 1. Quick pre-filter pass: Count exact line matches (fast) + * 2. Detailed pass: Only compute Levenshtein on promising candidates (expensive) + * + * The key insight: If two blocks are similar enough for fuzzy matching (e.g., 90%), + * then likely at least 60% of their lines will match exactly. + */ +function fastFuzzySearch( + lines: string[], + searchChunk: string, + startIndex: number, + endIndex: number, +) { + const searchLines = searchChunk.split(/\r?\n/); + const searchLen = searchLines.length; + + // Track start time for timeout + const startTime = performance.now(); + + // Quick threshold: require at least 60% exact line matches to be a candidate + const QUICK_THRESHOLD = 0.6; + + // First pass: find candidates with high exact line match ratio (very fast) + const candidates: Array<{ index: number; quickScore: number }> = []; + + for (let i = startIndex; i <= endIndex - searchLen; i++) { + // Check time limit + const elapsed = performance.now() - startTime; + if (elapsed > MAX_FUZZY_SEARCH_TIME_MS) { + console.warn( + `Fast fuzzy search timed out during pre-filter after ${(elapsed / 1000).toFixed(1)}s`, + ); + break; + } + + const quickScore = quickScoreByExactLines(lines, searchLines, i); + + if (quickScore >= QUICK_THRESHOLD) { + candidates.push({ index: i, quickScore }); + } + } + + // Sort candidates by quick score (best first) + candidates.sort((a, b) => b.quickScore - a.quickScore); + + // Second pass: only compute expensive Levenshtein on top candidates + let bestScore = 0; + let bestMatchIndex = -1; + + const MAX_CANDIDATES_TO_CHECK = 10; // Only check top 10 candidates + + for ( + let i = 0; + i < Math.min(candidates.length, MAX_CANDIDATES_TO_CHECK); + i++ + ) { + const candidate = candidates[i]; + + // Check time limit + const elapsed = performance.now() - startTime; + if (elapsed > MAX_FUZZY_SEARCH_TIME_MS) { + console.warn( + `Fast fuzzy search timed out during detailed pass after ${(elapsed / 1000).toFixed(1)}s. Best match: ${(bestScore * 100).toFixed(1)}%`, + ); + break; + } + + const originalChunk = lines + .slice(candidate.index, candidate.index + searchLen) + .join("\n"); + + const similarity = getSimilarity(originalChunk, searchChunk); + + if (similarity > bestScore) { + bestScore = similarity; + bestMatchIndex = candidate.index; + + // Early exit if we found a very good match + if (bestScore >= EARLY_STOP_THRESHOLD) { + return { bestScore, bestMatchIndex }; + } + } + } + + return { bestScore, bestMatchIndex }; +} + export function applySearchReplace( originalContent: string, diffContent: string, @@ -113,14 +266,29 @@ export function applySearchReplace( }; } - if (candidates.length === 0) { + if (candidates.length === 1) { + matchIndex = candidates[0]; + } + } + + // If still no match, try fuzzy matching with Levenshtein distance + if (matchIndex === -1) { + const searchChunk = searchLines.join("\n"); + const { bestScore, bestMatchIndex } = fastFuzzySearch( + resultLines, + searchChunk, + 0, + resultLines.length, + ); + + if (bestScore >= FUZZY_MATCH_THRESHOLD) { + matchIndex = bestMatchIndex; + } else { return { success: false, - error: "Search block did not match any content in the target file", + error: `Search block did not match any content in the target file. Best fuzzy match had similarity of ${(bestScore * 100).toFixed(1)}% (threshold: ${(FUZZY_MATCH_THRESHOLD * 100).toFixed(1)}%)`, }; } - - matchIndex = candidates[0]; } const matchedLines = resultLines.slice( diff --git a/src/utils/text_normalization.ts b/src/utils/text_normalization.ts new file mode 100644 index 0000000..62f77b1 --- /dev/null +++ b/src/utils/text_normalization.ts @@ -0,0 +1,20 @@ +/** + * Normalizes text for comparison by handling smart quotes and other special characters + */ +export function normalizeString(text: string): string { + return ( + text + // Normalize smart quotes to regular quotes + .replace(/[\u2018\u2019]/g, "'") // Single quotes + .replace(/[\u201C\u201D]/g, '"') // Double quotes + // Normalize different types of dashes + .replace(/[\u2013\u2014]/g, "-") // En dash and em dash to hyphen + // Normalize ellipsis + .replace(/\u2026/g, "...") // Ellipsis to three dots + // Normalize non-breaking spaces + .replace(/\u00A0/g, " ") // Non-breaking space to regular space + // Normalize other common Unicode variants + .replace(/\u00AD/g, "") // Soft hyphen (remove) + .replace(/[\uFEFF]/g, "") + ); // Zero-width no-break space (remove) +}