fix: edit tool (#1287)

This commit is contained in:
Filip
2025-07-24 22:18:04 +02:00
committed by GitHub
parent 72e464ac3e
commit 541a7a39d3
2 changed files with 231 additions and 34 deletions

View File

@@ -1,7 +1,7 @@
// the approaches in this edit tool are sourced from // the approaches in this edit tool are sourced from
// https://github.com/cline/cline/blob/main/evals/diff-edits/diff-apply/diff-06-23-25.ts // https://github.com/cline/cline/blob/main/evals/diff-edits/diff-apply/diff-06-23-25.ts
// https://github.com/google-gemini/gemini-cli/blob/main/packages/core/src/utils/editCorrector.ts // https://github.com/google-gemini/gemini-cli/blob/main/packages/core/src/utils/editCorrector.ts
// https://github.com/cline/cline/blob/main/evals/diff-edits/diff-apply/diff-06-26-25.ts
import { z } from "zod" import { z } from "zod"
import * as path from "path" import * as path from "path"
import { Tool } from "./tool" import { Tool } from "./tool"
@@ -105,6 +105,31 @@ export const EditTool = Tool.define({
export type Replacer = (content: string, find: string) => Generator<string, void, unknown> export type Replacer = (content: string, find: string) => Generator<string, void, unknown>
// Similarity thresholds for block anchor fallback matching
const SINGLE_CANDIDATE_SIMILARITY_THRESHOLD = 0.0
const MULTIPLE_CANDIDATES_SIMILARITY_THRESHOLD = 0.3
/**
* Levenshtein distance algorithm implementation
*/
function levenshtein(a: string, b: string): number {
// Handle empty strings
if (a === "" || b === "") {
return Math.max(a.length, b.length)
}
const matrix = Array.from({ length: a.length + 1 }, (_, i) =>
Array.from({ length: b.length + 1 }, (_, j) => (i === 0 ? j : j === 0 ? i : 0)),
)
for (let i = 1; i <= a.length; i++) {
for (let j = 1; j <= b.length; j++) {
const cost = a[i - 1] === b[j - 1] ? 0 : 1
matrix[i][j] = Math.min(matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j - 1] + cost)
}
}
return matrix[a.length][b.length]
}
export const SimpleReplacer: Replacer = function* (_content, find) { export const SimpleReplacer: Replacer = function* (_content, find) {
yield find yield find
} }
@@ -160,8 +185,10 @@ export const BlockAnchorReplacer: Replacer = function* (content, find) {
const firstLineSearch = searchLines[0].trim() const firstLineSearch = searchLines[0].trim()
const lastLineSearch = searchLines[searchLines.length - 1].trim() const lastLineSearch = searchLines[searchLines.length - 1].trim()
const searchBlockSize = searchLines.length
// Find blocks where first line matches the search first line // Collect all candidate positions where both anchors match
const candidates: Array<{ startLine: number; endLine: number }> = []
for (let i = 0; i < originalLines.length; i++) { for (let i = 0; i < originalLines.length; i++) {
if (originalLines[i].trim() !== firstLineSearch) { if (originalLines[i].trim() !== firstLineSearch) {
continue continue
@@ -170,25 +197,113 @@ export const BlockAnchorReplacer: Replacer = function* (content, find) {
// Look for the matching last line after this first line // Look for the matching last line after this first line
for (let j = i + 2; j < originalLines.length; j++) { for (let j = i + 2; j < originalLines.length; j++) {
if (originalLines[j].trim() === lastLineSearch) { if (originalLines[j].trim() === lastLineSearch) {
// Found a potential block from i to j candidates.push({ startLine: i, endLine: j })
let matchStartIndex = 0
for (let k = 0; k < i; k++) {
matchStartIndex += originalLines[k].length + 1
}
let matchEndIndex = matchStartIndex
for (let k = 0; k <= j - i; k++) {
matchEndIndex += originalLines[i + k].length
if (k < j - i) {
matchEndIndex += 1 // Add newline character except for the last line
}
}
yield content.substring(matchStartIndex, matchEndIndex)
break // Only match the first occurrence of the last line break // Only match the first occurrence of the last line
} }
} }
} }
// Return immediately if no candidates
if (candidates.length === 0) {
return
}
// Handle single candidate scenario (using relaxed threshold)
if (candidates.length === 1) {
const { startLine, endLine } = candidates[0]
const actualBlockSize = endLine - startLine + 1
let similarity = 0
let linesToCheck = Math.min(searchBlockSize - 2, actualBlockSize - 2) // Middle lines only
if (linesToCheck > 0) {
for (let j = 1; j < searchBlockSize - 1 && j < actualBlockSize - 1; j++) {
const originalLine = originalLines[startLine + j].trim()
const searchLine = searchLines[j].trim()
const maxLen = Math.max(originalLine.length, searchLine.length)
if (maxLen === 0) {
continue
}
const distance = levenshtein(originalLine, searchLine)
similarity += (1 - distance / maxLen) / linesToCheck
// Exit early when threshold is reached
if (similarity >= SINGLE_CANDIDATE_SIMILARITY_THRESHOLD) {
break
}
}
} else {
// No middle lines to compare, just accept based on anchors
similarity = 1.0
}
if (similarity >= SINGLE_CANDIDATE_SIMILARITY_THRESHOLD) {
let matchStartIndex = 0
for (let k = 0; k < startLine; k++) {
matchStartIndex += originalLines[k].length + 1
}
let matchEndIndex = matchStartIndex
for (let k = startLine; k <= endLine; k++) {
matchEndIndex += originalLines[k].length
if (k < endLine) {
matchEndIndex += 1 // Add newline character except for the last line
}
}
yield content.substring(matchStartIndex, matchEndIndex)
}
return
}
// Calculate similarity for multiple candidates
let bestMatch: { startLine: number; endLine: number } | null = null
let maxSimilarity = -1
for (const candidate of candidates) {
const { startLine, endLine } = candidate
const actualBlockSize = endLine - startLine + 1
let similarity = 0
let linesToCheck = Math.min(searchBlockSize - 2, actualBlockSize - 2) // Middle lines only
if (linesToCheck > 0) {
for (let j = 1; j < searchBlockSize - 1 && j < actualBlockSize - 1; j++) {
const originalLine = originalLines[startLine + j].trim()
const searchLine = searchLines[j].trim()
const maxLen = Math.max(originalLine.length, searchLine.length)
if (maxLen === 0) {
continue
}
const distance = levenshtein(originalLine, searchLine)
similarity += 1 - distance / maxLen
}
similarity /= linesToCheck // Average similarity
} else {
// No middle lines to compare, just accept based on anchors
similarity = 1.0
}
if (similarity > maxSimilarity) {
maxSimilarity = similarity
bestMatch = candidate
}
}
// Threshold judgment
if (maxSimilarity >= MULTIPLE_CANDIDATES_SIMILARITY_THRESHOLD && bestMatch) {
const { startLine, endLine } = bestMatch
let matchStartIndex = 0
for (let k = 0; k < startLine; k++) {
matchStartIndex += originalLines[k].length + 1
}
let matchEndIndex = matchStartIndex
for (let k = startLine; k <= endLine; k++) {
matchEndIndex += originalLines[k].length
if (k < endLine) {
matchEndIndex += 1
}
}
yield content.substring(matchStartIndex, matchEndIndex)
}
} }
export const WhitespaceNormalizedReplacer: Replacer = function* (content, find) { export const WhitespaceNormalizedReplacer: Replacer = function* (content, find) {
@@ -201,9 +316,8 @@ export const WhitespaceNormalizedReplacer: Replacer = function* (content, find)
const line = lines[i] const line = lines[i]
if (normalizeWhitespace(line) === normalizedFind) { if (normalizeWhitespace(line) === normalizedFind) {
yield line yield line
} } else {
// Only check for substring matches if the full line doesn't match
// Also check for substring matches within lines
const normalizedLine = normalizeWhitespace(line) const normalizedLine = normalizeWhitespace(line)
if (normalizedLine.includes(normalizedFind)) { if (normalizedLine.includes(normalizedFind)) {
// Find the actual substring in the original line that matches // Find the actual substring in the original line that matches
@@ -222,6 +336,7 @@ export const WhitespaceNormalizedReplacer: Replacer = function* (content, find)
} }
} }
} }
}
// Handle multi-line matches // Handle multi-line matches
const findLines = find.split("\n") const findLines = find.split("\n")
@@ -457,7 +572,7 @@ export function replace(content: string, oldString: string, newString: string, r
BlockAnchorReplacer, BlockAnchorReplacer,
WhitespaceNormalizedReplacer, WhitespaceNormalizedReplacer,
IndentationFlexibleReplacer, IndentationFlexibleReplacer,
// EscapeNormalizedReplacer, EscapeNormalizedReplacer,
// TrimmedBoundaryReplacer, // TrimmedBoundaryReplacer,
// ContextAwareReplacer, // ContextAwareReplacer,
// MultiOccurrenceReplacer, // MultiOccurrenceReplacer,

View File

@@ -326,6 +326,88 @@ const testCases: TestCase[] = [
find: "const msg = `Hello\\tWorld`;", find: "const msg = `Hello\\tWorld`;",
replace: "const msg = `Hi\\tWorld`;", replace: "const msg = `Hi\\tWorld`;",
}, },
// Test case that reproduces the greedy matching bug - now should fail due to low similarity
{
content: [
"func main() {",
" if condition {",
" doSomething()",
" }",
" processData()",
" if anotherCondition {",
" doOtherThing()",
" }",
" return mainLayout",
"}",
"",
"func helper() {",
" }",
" return mainLayout", // This should NOT be matched due to low similarity
"}",
].join("\n"),
find: [" }", " return mainLayout"].join("\n"),
replace: [" }", " // Add some code here", " return mainLayout"].join("\n"),
fail: true, // This should fail because the pattern has low similarity score
},
// Test case for the fix - more specific pattern should work
{
content: [
"function renderLayout() {",
" const header = createHeader()",
" const body = createBody()",
" return mainLayout",
"}",
].join("\n"),
find: ["function renderLayout() {", " // different content", " return mainLayout", "}"].join("\n"),
replace: [
"function renderLayout() {",
" const header = createHeader()",
" const body = createBody()",
" // Add minimap overlay",
" return mainLayout",
"}",
].join("\n"),
},
// Test that large blocks without arbitrary size limits can work
{
content: Array.from({ length: 100 }, (_, i) => `line ${i}`).join("\n"),
find: Array.from({ length: 50 }, (_, i) => `line ${i + 25}`).join("\n"),
replace: Array.from({ length: 50 }, (_, i) => `updated line ${i + 25}`).join("\n"),
},
// Test case for the fix - more specific pattern should work
{
content: [
"function renderLayout() {",
" const header = createHeader()",
" const body = createBody()",
" return mainLayout",
"}",
].join("\n"),
find: ["function renderLayout() {", " // different content", " return mainLayout", "}"].join("\n"),
replace: [
"function renderLayout() {",
" const header = createHeader()",
" const body = createBody()",
" // Add minimap overlay",
" return mainLayout",
"}",
].join("\n"),
},
// Test BlockAnchorReplacer with overly large blocks (should fail)
{
content:
Array.from({ length: 100 }, (_, i) => `line ${i}`).join("\n") +
"\nfunction test() {\n" +
Array.from({ length: 60 }, (_, i) => ` content ${i}`).join("\n") +
"\n return result\n}",
find: ["function test() {", " // different content", " return result", "}"].join("\n"),
replace: ["function test() {", " return 42", "}"].join("\n"),
},
] ]
describe("EditTool Replacers", () => { describe("EditTool Replacers", () => {