diff --git a/internal/storage/qdrant.go b/internal/storage/qdrant.go index 6d77f9b..59c9d63 100644 --- a/internal/storage/qdrant.go +++ b/internal/storage/qdrant.go @@ -3,6 +3,7 @@ package storage import ( "context" "fmt" + "strings" "github.com/qdrant/go-client/qdrant" ) @@ -515,6 +516,53 @@ func (c *QdrantClient) DeleteByFilter(ctx context.Context, key, value string) er return nil } +// DeleteByPrefix deletes all vectors where the payload field `key` starts with `prefix`. +// Uses Scroll to find matching points (client-side prefix filter) then batch deletes them. +// Returns the number of deleted points. +func (c *QdrantClient) DeleteByPrefix(ctx context.Context, key, prefix string) (int, error) { + // Single-pass scroll: request all points with only the target payload field. + // Workspace collections typically have <10k points, so a single pass is fine. + // If pagination is needed in the future, the Scroll Offset field can be used. + results, err := c.client.Scroll(ctx, &qdrant.ScrollPoints{ + CollectionName: c.config.Collection, + Limit: qdrant.PtrOf(uint32(50000)), + WithPayload: qdrant.NewWithPayloadInclude(key), + }) + if err != nil { + return 0, fmt.Errorf("scroll for prefix %q failed: %w", prefix, err) + } + + var allMatchingIDs []*qdrant.PointId + for _, point := range results { + if val, ok := point.Payload[key]; ok { + if strings.HasPrefix(val.GetStringValue(), prefix) { + allMatchingIDs = append(allMatchingIDs, point.Id) + } + } + } + + if len(allMatchingIDs) == 0 { + return 0, nil + } + + // Batch delete all matching point IDs + _, err = c.client.Delete(ctx, &qdrant.DeletePoints{ + CollectionName: c.config.Collection, + Points: &qdrant.PointsSelector{ + PointsSelectorOneOf: &qdrant.PointsSelector_Points{ + Points: &qdrant.PointsIdsList{ + Ids: allMatchingIDs, + }, + }, + }, + }) + if err != nil { + return 0, fmt.Errorf("batch delete of %d points by prefix %q failed: %w", len(allMatchingIDs), prefix, err) + } + + return len(allMatchingIDs), nil +} + // Close closes the Qdrant client connection func (c *QdrantClient) Close() error { if c.client != nil { diff --git a/internal/workspace/manager.go b/internal/workspace/manager.go index f6acb64..9058cbc 100644 --- a/internal/workspace/manager.go +++ b/internal/workspace/manager.go @@ -634,13 +634,43 @@ func (m *Manager) IndexLanguage(ctx context.Context, info *Info, language string } // Check for deleted files (both code and docs) - // We scan the state and check if files still exist in current scan - // But scan only has current files. - // Better: iterate state.Files and check if they exist on disk. - state.mu.RLock() - for path := range state.Files { - if _, err := os.Stat(path); os.IsNotExist(err) { - // It's deleted. Determine if it was code or doc based on extension + // Use directory-aware cleanup: detect the highest deleted directory and + // bulk-remove all vectors under it rather than deleting one by one. + staleFiles := CollectStaleFiles(state) + + if len(staleFiles) > 0 { + dirPrefixes, individualStale := GroupByDeletedRoot(staleFiles, info.Root) + + // --- Bulk delete for entire deleted directories --- + for prefix, files := range dirPrefixes { + log.Printf("๐Ÿ” Detected deleted directory: %s (%d tracked files)", prefix, len(files)) + + deleted, delErr := collectionClient.DeleteByPrefix(ctx, "file", prefix) + if delErr != nil { + log.Printf("โš ๏ธ Prefix delete failed for %s: %v โ€” falling back to per-file delete", prefix, delErr) + // Fallback: delete individually + for _, path := range files { + if err := ltm.DeleteByMetadata(ctx, "file", path); err != nil { + log.Printf("โš ๏ธ Failed to delete chunks for %s: %v", path, err) + } + } + } else if deleted > 0 { + log.Printf("๐Ÿงน Bulk-deleted %d vectors under %s", deleted, prefix) + } + + // Also remove any other state entries that match this prefix + // (files we haven't seen in staleFiles because they weren't in the iteration) + state.mu.Lock() + for path := range state.Files { + if strings.HasPrefix(path, prefix) { + delete(state.Files, path) + } + } + state.mu.Unlock() + } + + // --- Individual file deletions (parent dir still exists) --- + for _, path := range individualStale { ext := strings.ToLower(filepath.Ext(path)) if ext == ".md" { docsToDelete = append(docsToDelete, path) @@ -649,7 +679,6 @@ func (m *Manager) IndexLanguage(ctx context.Context, info *Info, language string } } } - state.mu.RUnlock() // Process deletions (Code) if len(filesToDelete) > 0 { diff --git a/internal/workspace/stale.go b/internal/workspace/stale.go new file mode 100644 index 0000000..c4a3177 --- /dev/null +++ b/internal/workspace/stale.go @@ -0,0 +1,86 @@ +package workspace + +import ( + "os" + "path/filepath" + "strings" +) + +// FindDeletedRoot walks up from filePath to find the highest deleted directory. +// If only the file itself is missing but its parent directory exists, returns filePath. +// If a parent directory is also missing, walks further up to find the topmost deleted ancestor +// below workspaceRoot. +// +// Example: +// +// Given: filePath="/project/tmp/vendor/src/foo.php", workspaceRoot="/project" +// If /project/tmp/ was deleted entirely: +// Returns "/project/tmp" (the highest missing ancestor) +func FindDeletedRoot(filePath, workspaceRoot string) string { + highestDeleted := filePath + dir := filepath.Dir(filePath) + wsClean := filepath.Clean(workspaceRoot) + + for { + dirClean := filepath.Clean(dir) + // Stop if we've reached the workspace root or filesystem root + if dirClean == wsClean || dirClean == "/" || dirClean == "." { + break + } + + if _, err := os.Stat(dir); os.IsNotExist(err) { + // This directory is also deleted โ€” record it and keep going up + highestDeleted = dir + dir = filepath.Dir(dir) + continue + } + // Directory exists โ€” stop here + break + } + + return filepath.Clean(highestDeleted) +} + +// IsDirectoryDeletion returns true if the deleted root is a directory path +// (i.e., a parent directory was deleted, not just the individual file). +func IsDirectoryDeletion(deletedRoot, originalFile string) bool { + return filepath.Clean(deletedRoot) != filepath.Clean(originalFile) +} + +// CollectStaleFiles iterates WorkspaceState.Files and returns paths +// that no longer exist on disk. +func CollectStaleFiles(state *WorkspaceState) []string { + state.mu.RLock() + defer state.mu.RUnlock() + + var stale []string + for path := range state.Files { + if _, err := os.Stat(path); os.IsNotExist(err) { + stale = append(stale, path) + } + } + return stale +} + +// GroupByDeletedRoot groups stale file paths by their highest deleted ancestor directory. +// Returns two maps: +// - dirPrefixes: map[prefix][]filePath โ€” files under a deleted directory +// - individualFiles: files whose parent directory still exists +func GroupByDeletedRoot(staleFiles []string, workspaceRoot string) (dirPrefixes map[string][]string, individualFiles []string) { + dirPrefixes = make(map[string][]string) + + for _, f := range staleFiles { + root := FindDeletedRoot(f, workspaceRoot) + if IsDirectoryDeletion(root, f) { + prefix := root + if !strings.HasSuffix(prefix, string(os.PathSeparator)) { + prefix += string(os.PathSeparator) + } + dirPrefixes[prefix] = append(dirPrefixes[prefix], f) + } else { + individualFiles = append(individualFiles, f) + } + } + + return dirPrefixes, individualFiles +} diff --git a/internal/workspace/stale_test.go b/internal/workspace/stale_test.go new file mode 100644 index 0000000..4a49ba7 --- /dev/null +++ b/internal/workspace/stale_test.go @@ -0,0 +1,151 @@ +package workspace + +import ( + "os" + "path/filepath" + "sort" + "testing" +) + +func TestFindDeletedRoot_FileOnlyDeleted(t *testing.T) { + // Setup: create workspace/dir/subdir/ but NOT the file inside it + wsRoot := t.TempDir() + subdir := filepath.Join(wsRoot, "src", "pkg") + if err := os.MkdirAll(subdir, 0755); err != nil { + t.Fatal(err) + } + + // File that doesn't exist, but parent directory does + missingFile := filepath.Join(subdir, "deleted.go") + + result := FindDeletedRoot(missingFile, wsRoot) + if result != missingFile { + t.Errorf("expected %q, got %q โ€” when only the file is missing, should return the file path itself", missingFile, result) + } +} + +func TestFindDeletedRoot_DirectoryDeleted(t *testing.T) { + // Setup: workspace root exists, but "tmp" directory does NOT + wsRoot := t.TempDir() + + // Neither tmp/ nor its children exist + missingFile := filepath.Join(wsRoot, "tmp", "vendor", "src", "foo.php") + + result := FindDeletedRoot(missingFile, wsRoot) + expected := filepath.Join(wsRoot, "tmp") + if result != expected { + t.Errorf("expected %q, got %q โ€” should return the highest deleted directory", expected, result) + } +} + +func TestFindDeletedRoot_IntermediateDirectoryDeleted(t *testing.T) { + // Setup: workspace/src/ exists, but workspace/src/old/ does NOT + wsRoot := t.TempDir() + srcDir := filepath.Join(wsRoot, "src") + if err := os.MkdirAll(srcDir, 0755); err != nil { + t.Fatal(err) + } + + // src/ exists, but src/old/ and deeper don't + missingFile := filepath.Join(wsRoot, "src", "old", "legacy", "file.go") + + result := FindDeletedRoot(missingFile, wsRoot) + expected := filepath.Join(wsRoot, "src", "old") + if result != expected { + t.Errorf("expected %q, got %q โ€” should return the highest deleted directory below existing parent", expected, result) + } +} + +func TestFindDeletedRoot_WorkspaceRootBoundary(t *testing.T) { + // Ensure we never go above workspace root + wsRoot := t.TempDir() + + // Everything under workspace root is deleted + missingFile := filepath.Join(wsRoot, "a", "b", "c", "d.txt") + + result := FindDeletedRoot(missingFile, wsRoot) + expected := filepath.Join(wsRoot, "a") + if result != expected { + t.Errorf("expected %q, got %q โ€” should stop at direct child of workspace root", expected, result) + } +} + +func TestIsDirectoryDeletion(t *testing.T) { + tests := []struct { + deletedRoot string + originalFile string + expected bool + }{ + {"/project/tmp", "/project/tmp/foo.php", true}, + {"/project/foo.go", "/project/foo.go", false}, + {"/project/src/old", "/project/src/old/deep/file.go", true}, + } + + for _, tt := range tests { + result := IsDirectoryDeletion(tt.deletedRoot, tt.originalFile) + if result != tt.expected { + t.Errorf("IsDirectoryDeletion(%q, %q) = %v, want %v", + tt.deletedRoot, tt.originalFile, result, tt.expected) + } + } +} + +func TestGroupByDeletedRoot(t *testing.T) { + wsRoot := t.TempDir() + + // Create src/ (exists) but not tmp/ (deleted) + srcDir := filepath.Join(wsRoot, "src") + if err := os.MkdirAll(srcDir, 0755); err != nil { + t.Fatal(err) + } + + staleFiles := []string{ + filepath.Join(wsRoot, "tmp", "a", "file1.go"), + filepath.Join(wsRoot, "tmp", "b", "file2.go"), + filepath.Join(wsRoot, "src", "deleted.go"), // parent exists, only file missing + } + + dirPrefixes, individualFiles := GroupByDeletedRoot(staleFiles, wsRoot) + + // tmp/ is the deleted root for both tmp files + expectedPrefix := filepath.Join(wsRoot, "tmp") + string(os.PathSeparator) + if _, ok := dirPrefixes[expectedPrefix]; !ok { + t.Errorf("expected prefix %q in dirPrefixes, got: %v", expectedPrefix, dirPrefixes) + } + + if len(dirPrefixes[expectedPrefix]) != 2 { + t.Errorf("expected 2 files under prefix %q, got %d", expectedPrefix, len(dirPrefixes[expectedPrefix])) + } + + sort.Strings(individualFiles) + expectedIndividual := filepath.Join(wsRoot, "src", "deleted.go") + if len(individualFiles) != 1 || individualFiles[0] != expectedIndividual { + t.Errorf("expected individualFiles = [%q], got %v", expectedIndividual, individualFiles) + } +} + +func TestCollectStaleFiles(t *testing.T) { + wsRoot := t.TempDir() + + // Create a real file + existingFile := filepath.Join(wsRoot, "exists.go") + if err := os.WriteFile(existingFile, []byte("package main"), 0644); err != nil { + t.Fatal(err) + } + + // Create state with both existing and non-existing files + state := NewWorkspaceState() + + realInfo, _ := os.Stat(existingFile) + state.UpdateFile(existingFile, realInfo) + state.Files[filepath.Join(wsRoot, "deleted.go")] = FileState{} + + stale := CollectStaleFiles(state) + + if len(stale) != 1 { + t.Fatalf("expected 1 stale file, got %d: %v", len(stale), stale) + } + if stale[0] != filepath.Join(wsRoot, "deleted.go") { + t.Errorf("expected stale file %q, got %q", filepath.Join(wsRoot, "deleted.go"), stale[0]) + } +}