Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions pkg/tools/builtin/filesystem/filesystem.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package filesystem

import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
Expand All @@ -9,6 +10,7 @@ import (
"io"
"io/fs"
"log/slog"
"net/http"
"os"
"path/filepath"
"regexp"
Expand Down Expand Up @@ -1181,6 +1183,11 @@ func (t *ToolSet) handleSearchFilesContent(_ context.Context, args SearchFilesCo
return nil
}

binary, err := t.isBinaryFile(path)
if err != nil || binary {
return nil
}

content, err := t.readFile(path)
if err != nil {
return nil
Expand Down Expand Up @@ -1265,6 +1272,53 @@ func (t *ToolSet) handleSearchFilesContent(_ context.Context, args SearchFilesCo
}, nil
}

func (t *ToolSet) readFileHeader(resolved string, size int) ([]byte, error) {
if size <= 0 {
return nil, nil
}

root, rel, err := t.rootedAccess(resolved)
if err != nil {
return nil, err
}

var file *os.File
if root != nil {
file, err = root.Open(rel)
} else {
file, err = os.Open(resolved)
}
if err != nil {
return nil, err
}
defer file.Close()

buf := make([]byte, size)
n, err := io.ReadFull(file, buf)
if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
return nil, err
}
return buf[:n], nil
}

func (t *ToolSet) isBinaryFile(resolved string) (bool, error) {
header, err := t.readFileHeader(resolved, maxBinarySniffBytes)
if err != nil {
return false, err
}
return isBinaryContent(header), nil
}

func isBinaryContent(header []byte) bool {
if len(header) == 0 {
return false
}
if bytes.IndexByte(header, 0) >= 0 {
return true
}
return !strings.HasPrefix(http.DetectContentType(header), "text/")
}

func (t *ToolSet) handleWriteFile(ctx context.Context, args WriteFileArgs) (*tools.ToolCallResult, error) {
resolvedPath, err := t.resolveAndCheckPath(args.Path)
if err != nil {
Expand Down
46 changes: 46 additions & 0 deletions pkg/tools/builtin/filesystem/filesystem_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,52 @@ func TestFilesystemTool_SearchFilesContent_SkipsSymlinkToLargeFile(t *testing.T)
assert.NotContains(t, result.Output, "target.bin")
}

// TestFilesystemTool_SearchFilesContent_SkipsBinaryFiles verifies that
// small binary files are skipped even when they contain the query bytes.
func TestFilesystemTool_SearchFilesContent_SkipsBinaryFiles(t *testing.T) {
t.Parallel()
tmpDir := t.TempDir()
tool := New(tmpDir)

require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "text.txt"), []byte("findme\n"), 0o644))
require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "program.bin"), []byte{0x7f, 'E', 'L', 'F', 0, 'f', 'i', 'n', 'd', 'm', 'e', '\n'}, 0o644))
require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "doc.pdf"), []byte("%PDF-1.7\nfindme\n"), 0o644))

result, err := tool.handleSearchFilesContent(t.Context(), SearchFilesContentArgs{
Path: ".",
Query: "findme",
})
require.NoError(t, err)
assert.Contains(t, result.Output, "text.txt")
assert.NotContains(t, result.Output, "program.bin")
assert.NotContains(t, result.Output, "doc.pdf")
assert.Equal(t, SearchFilesContentMeta{MatchCount: 1, FileCount: 1}, result.Meta)
}

func TestIsBinaryContent(t *testing.T) {
t.Parallel()

tests := []struct {
name string
header []byte
want bool
}{
{name: "empty", header: nil, want: false},
{name: "plain text", header: []byte("package main\nfunc main() {}\n"), want: false},
{name: "utf8 text", header: []byte("Résumé\n"), want: false},
{name: "elf", header: []byte{0x7f, 'E', 'L', 'F', 0, 1, 1, 0}, want: true},
{name: "png", header: []byte{0x89, 'P', 'N', 'G', '\r', '\n', 0x1a, '\n'}, want: true},
{name: "pdf", header: []byte("%PDF-1.7\nfindme\n"), want: true},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
assert.Equal(t, tc.want, isBinaryContent(tc.header))
})
}
}

func TestFilesystemTool_PostEditCommands(t *testing.T) {
t.Parallel()
tmpDir := t.TempDir()
Expand Down
4 changes: 4 additions & 0 deletions pkg/tools/builtin/filesystem/limit.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ const maxFiles = 100
// Skipping them keeps a recursive search over a large tree bounded.
const maxSearchFileSize = 10 << 20 // 10 MiB

// maxBinarySniffBytes is how much of a file header search_files_content
// reads to decide whether the file is binary before loading the rest.
const maxBinarySniffBytes = 512

// maxSearchOutputBytes caps the total size of the joined match output.
// Without it a broad search (e.g. across the home directory) produces a
// multi-megabyte result that is then copied into the in-memory message
Expand Down
Loading