fix: better binary file detection (#2025)

This commit is contained in:
Aiden Cline
2025-08-17 17:59:51 -05:00
committed by GitHub
parent de1764841c
commit ebd1b18b70

View File

@@ -53,7 +53,7 @@ export const ReadTool = Tool.define("read", {
const offset = params.offset || 0 const offset = params.offset || 0
const isImage = isImageFile(filepath) const isImage = isImageFile(filepath)
if (isImage) throw new Error(`This is an image file of type: ${isImage}\nUse a different tool to process images`) if (isImage) throw new Error(`This is an image file of type: ${isImage}\nUse a different tool to process images`)
const isBinary = await isBinaryFile(file) const isBinary = await isBinaryFile(filepath, file)
if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`) if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)
const lines = await file.text().then((text) => text.split("\n")) const lines = await file.text().then((text) => text.split("\n"))
const raw = lines.slice(offset, offset + limit).map((line) => { const raw = lines.slice(offset, offset + limit).map((line) => {
@@ -105,13 +105,59 @@ function isImageFile(filePath: string): string | false {
} }
} }
async function isBinaryFile(file: Bun.BunFile): Promise<boolean> { async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
const buffer = await file.arrayBuffer() const ext = path.extname(filepath).toLowerCase()
const bytes = new Uint8Array(buffer.slice(0, 512)) // Check first 512 bytes // binary check for common non-text extensions
switch (ext) {
for (let i = 0; i < bytes.length; i++) { case ".zip":
if (bytes[i] === 0) return true // Null byte indicates binary case ".tar":
case ".gz":
case ".exe":
case ".dll":
case ".so":
case ".class":
case ".jar":
case ".war":
case ".7z":
case ".doc":
case ".docx":
case ".xls":
case ".xlsx":
case ".ppt":
case ".pptx":
case ".odt":
case ".ods":
case ".odp":
case ".bin":
case ".dat":
case ".obj":
case ".o":
case ".a":
case ".lib":
case ".wasm":
case ".pyc":
case ".pyo":
return true
default:
break
} }
return false const stat = await file.stat()
const fileSize = stat.size
if (fileSize === 0) return false
const bufferSize = Math.min(4096, fileSize)
const buffer = await file.arrayBuffer()
if (buffer.byteLength === 0) return false
const bytes = new Uint8Array(buffer.slice(0, bufferSize))
let nonPrintableCount = 0
for (let i = 0; i < bytes.length; i++) {
if (bytes[i] === 0) return true
if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
nonPrintableCount++
}
}
// If >30% non-printable characters, consider it binary
return nonPrintableCount / bytes.length > 0.3
} }