From 5896ae4fc5389628439d9bd92e2e7d2cd7c57bb4 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 11 Dec 2025 15:14:27 +0000 Subject: [PATCH 01/18] Fs cp now upload in parallel --- NEXT_CHANGELOG.md | 3 ++ cmd/fs/cp.go | 81 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 70 insertions(+), 14 deletions(-) diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 7f34ce2a5e..7a9476e5a1 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -6,6 +6,9 @@ ### CLI +* Improve performance of `databricks fs cp` command by parallelizing file uploads when + copying directories with the `--recursive` flag. + ### Bundles ### Dependency updates diff --git a/cmd/fs/cp.go b/cmd/fs/cp.go index 275620d7c3..31b21031f7 100644 --- a/cmd/fs/cp.go +++ b/cmd/fs/cp.go @@ -9,13 +9,19 @@ import ( "path" "path/filepath" "strings" + "sync" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/filer" "github.com/spf13/cobra" + "golang.org/x/sync/errgroup" ) +// Maximum number of concurrent file copy operations. The number was set +// conservatively and might be adjusted in the future. +const maxWorkers = 16 + type copy struct { overwrite bool recursive bool @@ -25,40 +31,83 @@ type copy struct { targetFiler filer.Filer sourceScheme string targetScheme string + + mu sync.Mutex // protect output from concurrent writes +} + +type copyTask struct { + sourcePath string + targetPath string } -func (c *copy) cpWriteCallback(sourceDir, targetDir string) fs.WalkDirFunc { +// cpDirToDir recursively copies the contents of a directory to another +// directory. +// +// There is no guarantee on the order in which the files are copied. +// +// The method does not take care of retrying on error; this is considered to +// be the responsibility of the Filer implementation. If a file copy fails, +// the error is returned and the other copies are cancelled. +func (c *copy) cpDirToDir(sourceDir, targetDir string) error { + if !c.recursive { + return fmt.Errorf("source path %s is a directory. Please specify the --recursive flag", sourceDir) + } + + // Walk the source directory and collect all files to copy. + var tasks []copyTask + sourceFs := filer.NewFS(c.ctx, c.sourceFiler) + err := fs.WalkDir(sourceFs, sourceDir, c.cpCollectCallback(sourceDir, targetDir, &tasks)) + if err != nil { + return err + } + + if len(tasks) == 0 { + return nil // no file to copy + } + + // Process each file copy in parallel using a fixed number of workers. + g, ctx := errgroup.WithContext(c.ctx) + g.SetLimit(min(maxWorkers, len(tasks))) + for _, task := range tasks { + g.Go(func() error { + if ctx.Err() != nil { + return ctx.Err() + } + return c.cpFileToFile(task.sourcePath, task.targetPath) + }) + } + + return g.Wait() +} + +func (c *copy) cpCollectCallback(sourceDir, targetDir string, tasks *[]copyTask) fs.WalkDirFunc { return func(sourcePath string, d fs.DirEntry, err error) error { if err != nil { return err } - // Compute path relative to the target directory + // Compute path relative to the source directory. relPath, err := filepath.Rel(sourceDir, sourcePath) if err != nil { return err } relPath = filepath.ToSlash(relPath) - // Compute target path for the file + // Compute target path for the file. targetPath := path.Join(targetDir, relPath) - // create directory and return early + // Create directory and return early. if d.IsDir() { return c.targetFiler.Mkdir(c.ctx, targetPath) } - return c.cpFileToFile(sourcePath, targetPath) - } -} - -func (c *copy) cpDirToDir(sourceDir, targetDir string) error { - if !c.recursive { - return fmt.Errorf("source path %s is a directory. Please specify the --recursive flag", sourceDir) + // Collect file copy tasks. + *tasks = append(*tasks, copyTask{ + sourcePath: sourcePath, + targetPath: targetPath, + }) + return nil } - - sourceFs := filer.NewFS(c.ctx, c.sourceFiler) - return fs.WalkDir(sourceFs, sourceDir, c.cpWriteCallback(sourceDir, targetDir)) } func (c *copy) cpFileToDir(sourcePath, targetDir string) error { @@ -109,6 +158,8 @@ func (c *copy) emitFileSkippedEvent(sourcePath, targetPath string) error { event := newFileSkippedEvent(fullSourcePath, fullTargetPath) template := "{{.SourcePath}} -> {{.TargetPath}} (skipped; already exists)\n" + c.mu.Lock() + defer c.mu.Unlock() return cmdio.RenderWithTemplate(c.ctx, event, "", template) } @@ -125,6 +176,8 @@ func (c *copy) emitFileCopiedEvent(sourcePath, targetPath string) error { event := newFileCopiedEvent(fullSourcePath, fullTargetPath) template := "{{.SourcePath}} -> {{.TargetPath}}\n" + c.mu.Lock() + defer c.mu.Unlock() return cmdio.RenderWithTemplate(c.ctx, event, "", template) } From b9333981ca5727817ceb3e31138c4233562c00f7 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 11 Dec 2025 21:00:22 +0000 Subject: [PATCH 02/18] Improve performance --- cmd/fs/cp.go | 76 ++++++++++++++++++++--------------------------- cmd/fs/cp_test.go | 27 +++++++++++++++++ 2 files changed, 59 insertions(+), 44 deletions(-) create mode 100644 cmd/fs/cp_test.go diff --git a/cmd/fs/cp.go b/cmd/fs/cp.go index 31b21031f7..a80ce53c74 100644 --- a/cmd/fs/cp.go +++ b/cmd/fs/cp.go @@ -18,13 +18,13 @@ import ( "golang.org/x/sync/errgroup" ) -// Maximum number of concurrent file copy operations. The number was set -// conservatively and might be adjusted in the future. -const maxWorkers = 16 +// Default number of concurrent file copy operations. +const defaultConcurrency = 16 type copy struct { - overwrite bool - recursive bool + overwrite bool + recursive bool + concurrency int ctx context.Context sourceFiler filer.Filer @@ -35,11 +35,6 @@ type copy struct { mu sync.Mutex // protect output from concurrent writes } -type copyTask struct { - sourcePath string - targetPath string -} - // cpDirToDir recursively copies the contents of a directory to another // directory. // @@ -53,35 +48,13 @@ func (c *copy) cpDirToDir(sourceDir, targetDir string) error { return fmt.Errorf("source path %s is a directory. Please specify the --recursive flag", sourceDir) } - // Walk the source directory and collect all files to copy. - var tasks []copyTask - sourceFs := filer.NewFS(c.ctx, c.sourceFiler) - err := fs.WalkDir(sourceFs, sourceDir, c.cpCollectCallback(sourceDir, targetDir, &tasks)) - if err != nil { - return err - } - - if len(tasks) == 0 { - return nil // no file to copy - } - - // Process each file copy in parallel using a fixed number of workers. + // Pool of workers to process copy operations in parallel. g, ctx := errgroup.WithContext(c.ctx) - g.SetLimit(min(maxWorkers, len(tasks))) - for _, task := range tasks { - g.Go(func() error { - if ctx.Err() != nil { - return ctx.Err() - } - return c.cpFileToFile(task.sourcePath, task.targetPath) - }) - } + g.SetLimit(c.concurrency) - return g.Wait() -} - -func (c *copy) cpCollectCallback(sourceDir, targetDir string, tasks *[]copyTask) fs.WalkDirFunc { - return func(sourcePath string, d fs.DirEntry, err error) error { + // Walk the source directory, queueing copy operations for processing. + sourceFs := filer.NewFS(c.ctx, c.sourceFiler) + err := fs.WalkDir(sourceFs, sourceDir, func(sourcePath string, d fs.DirEntry, err error) error { if err != nil { return err } @@ -96,18 +69,26 @@ func (c *copy) cpCollectCallback(sourceDir, targetDir string, tasks *[]copyTask) // Compute target path for the file. targetPath := path.Join(targetDir, relPath) - // Create directory and return early. + // Create the directory synchronously. This must happen before files + // are copied into it, and WalkDir guarantees directories are visited + // before their contents. if d.IsDir() { return c.targetFiler.Mkdir(c.ctx, targetPath) } - // Collect file copy tasks. - *tasks = append(*tasks, copyTask{ - sourcePath: sourcePath, - targetPath: targetPath, + // Queue file copy operation for processing. + g.Go(func() error { + if ctx.Err() != nil { + return ctx.Err() + } + return c.cpFileToFile(sourcePath, targetPath) }) return nil + }) + if err != nil { + return err } + return g.Wait() } func (c *copy) cpFileToDir(sourcePath, targetDir string) error { @@ -206,13 +187,20 @@ func newCpCommand() *cobra.Command { When copying a file, if TARGET_PATH is a directory, the file will be created inside the directory, otherwise the file is created at TARGET_PATH. `, - Args: root.ExactArgs(2), - PreRunE: root.MustWorkspaceClient, + Args: root.ExactArgs(2), } var c copy cmd.Flags().BoolVar(&c.overwrite, "overwrite", false, "overwrite existing files") cmd.Flags().BoolVarP(&c.recursive, "recursive", "r", false, "recursively copy files from directory") + cmd.Flags().IntVar(&c.concurrency, "concurrency", defaultConcurrency, "number of parallel copy operations") + + cmd.PreRunE = func(cmd *cobra.Command, args []string) error { + if c.concurrency <= 0 { + return fmt.Errorf("--concurrency must be at least 1") + } + return root.MustWorkspaceClient(cmd, args) + } cmd.RunE = func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() diff --git a/cmd/fs/cp_test.go b/cmd/fs/cp_test.go new file mode 100644 index 0000000000..87c1170b7e --- /dev/null +++ b/cmd/fs/cp_test.go @@ -0,0 +1,27 @@ +package fs + +import ( + "context" + "strings" + "testing" +) + +func TestCpConcurrencyValidation(t *testing.T) { + ctx := context.Background() + cmd := newCpCommand() + cmd.SetContext(ctx) + + // Test concurrency = 0 + cmd.SetArgs([]string{"src", "dst", "--concurrency", "0"}) + err := cmd.Execute() + if err == nil || !strings.Contains(err.Error(), "--concurrency must be at least 1") { + t.Errorf("expected error containing '--concurrency must be at least 1', got %v", err) + } + + // Test concurrency = -1 + cmd.SetArgs([]string{"src", "dst", "--concurrency", "-1"}) + err = cmd.Execute() + if err == nil || !strings.Contains(err.Error(), "--concurrency must be at least 1") { + t.Errorf("expected error containing '--concurrency must be at least 1', got %v", err) + } +} From 2fa03e1965a3211b9de4a64a8d9e5200d0e7dc36 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 11 Dec 2025 21:11:29 +0000 Subject: [PATCH 03/18] Use error value --- cmd/fs/cp.go | 6 +++++- cmd/fs/cp_test.go | 34 ++++++++++++++++++---------------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/cmd/fs/cp.go b/cmd/fs/cp.go index a80ce53c74..24cb333aa7 100644 --- a/cmd/fs/cp.go +++ b/cmd/fs/cp.go @@ -21,6 +21,10 @@ import ( // Default number of concurrent file copy operations. const defaultConcurrency = 16 +// errInvalidConcurrency is returned when the value of the concurrency +// flag is invalid. +var errInvalidConcurrency = errors.New("--concurrency must be at least 1") + type copy struct { overwrite bool recursive bool @@ -197,7 +201,7 @@ func newCpCommand() *cobra.Command { cmd.PreRunE = func(cmd *cobra.Command, args []string) error { if c.concurrency <= 0 { - return fmt.Errorf("--concurrency must be at least 1") + return errInvalidConcurrency } return root.MustWorkspaceClient(cmd, args) } diff --git a/cmd/fs/cp_test.go b/cmd/fs/cp_test.go index 87c1170b7e..1d719368da 100644 --- a/cmd/fs/cp_test.go +++ b/cmd/fs/cp_test.go @@ -1,27 +1,29 @@ package fs import ( - "context" - "strings" + "errors" + "fmt" "testing" ) func TestCpConcurrencyValidation(t *testing.T) { - ctx := context.Background() - cmd := newCpCommand() - cmd.SetContext(ctx) - - // Test concurrency = 0 - cmd.SetArgs([]string{"src", "dst", "--concurrency", "0"}) - err := cmd.Execute() - if err == nil || !strings.Contains(err.Error(), "--concurrency must be at least 1") { - t.Errorf("expected error containing '--concurrency must be at least 1', got %v", err) + testCases := []struct { + concurrency int + wantError error + }{ + {-1337, errInvalidConcurrency}, + {-1, errInvalidConcurrency}, + {0, errInvalidConcurrency}, } - // Test concurrency = -1 - cmd.SetArgs([]string{"src", "dst", "--concurrency", "-1"}) - err = cmd.Execute() - if err == nil || !strings.Contains(err.Error(), "--concurrency must be at least 1") { - t.Errorf("expected error containing '--concurrency must be at least 1', got %v", err) + for _, tc := range testCases { + t.Run(fmt.Sprintf("concurrency=%d", tc.concurrency), func(t *testing.T) { + cmd := newCpCommand() + cmd.SetArgs([]string{"src", "dst", "--concurrency", fmt.Sprintf("%d", tc.concurrency)}) + err := cmd.Execute() + if !errors.Is(err, tc.wantError) { + t.Errorf("expected error %v, got %v", tc.wantError, err) + } + }) } } From d393388686c299298f588244c88dded8d202821c Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 11 Dec 2025 21:31:55 +0000 Subject: [PATCH 04/18] Cancel go routines --- cmd/fs/cp.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cmd/fs/cp.go b/cmd/fs/cp.go index 24cb333aa7..0bbdc2c631 100644 --- a/cmd/fs/cp.go +++ b/cmd/fs/cp.go @@ -52,11 +52,16 @@ func (c *copy) cpDirToDir(sourceDir, targetDir string) error { return fmt.Errorf("source path %s is a directory. Please specify the --recursive flag", sourceDir) } + // Create cancellable context to ensure cleanup and that all goroutines + // are stopped when the function exits on any error path. + ctx, cancel := context.WithCancel(c.ctx) + defer cancel() + // Pool of workers to process copy operations in parallel. - g, ctx := errgroup.WithContext(c.ctx) + g, ctx := errgroup.WithContext(ctx) g.SetLimit(c.concurrency) - // Walk the source directory, queueing copy operations for processing. + // Walk the source directory, queueing file copy operations for processing. sourceFs := filer.NewFS(c.ctx, c.sourceFiler) err := fs.WalkDir(sourceFs, sourceDir, func(sourcePath string, d fs.DirEntry, err error) error { if err != nil { From 14cfad9121748b281284e33b14665aa4609a6f8f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 11 Dec 2025 21:37:22 +0000 Subject: [PATCH 05/18] Properly propagate context --- cmd/fs/cp.go | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/cmd/fs/cp.go b/cmd/fs/cp.go index 0bbdc2c631..6149b4c611 100644 --- a/cmd/fs/cp.go +++ b/cmd/fs/cp.go @@ -30,7 +30,6 @@ type copy struct { recursive bool concurrency int - ctx context.Context sourceFiler filer.Filer targetFiler filer.Filer sourceScheme string @@ -47,14 +46,15 @@ type copy struct { // The method does not take care of retrying on error; this is considered to // be the responsibility of the Filer implementation. If a file copy fails, // the error is returned and the other copies are cancelled. -func (c *copy) cpDirToDir(sourceDir, targetDir string) error { +func (c *copy) cpDirToDir(ctx context.Context, sourceDir, targetDir string) error { if !c.recursive { return fmt.Errorf("source path %s is a directory. Please specify the --recursive flag", sourceDir) } // Create cancellable context to ensure cleanup and that all goroutines - // are stopped when the function exits on any error path. - ctx, cancel := context.WithCancel(c.ctx) + // are stopped when the function exits on any error path (e.g. permission + // denied when walking the source directory). + ctx, cancel := context.WithCancel(ctx) defer cancel() // Pool of workers to process copy operations in parallel. @@ -62,7 +62,7 @@ func (c *copy) cpDirToDir(sourceDir, targetDir string) error { g.SetLimit(c.concurrency) // Walk the source directory, queueing file copy operations for processing. - sourceFs := filer.NewFS(c.ctx, c.sourceFiler) + sourceFs := filer.NewFS(ctx, c.sourceFiler) err := fs.WalkDir(sourceFs, sourceDir, func(sourcePath string, d fs.DirEntry, err error) error { if err != nil { return err @@ -82,7 +82,7 @@ func (c *copy) cpDirToDir(sourceDir, targetDir string) error { // are copied into it, and WalkDir guarantees directories are visited // before their contents. if d.IsDir() { - return c.targetFiler.Mkdir(c.ctx, targetPath) + return c.targetFiler.Mkdir(ctx, targetPath) } // Queue file copy operation for processing. @@ -90,7 +90,7 @@ func (c *copy) cpDirToDir(sourceDir, targetDir string) error { if ctx.Err() != nil { return ctx.Err() } - return c.cpFileToFile(sourcePath, targetPath) + return c.cpFileToFile(ctx, sourcePath, targetPath) }) return nil }) @@ -100,42 +100,42 @@ func (c *copy) cpDirToDir(sourceDir, targetDir string) error { return g.Wait() } -func (c *copy) cpFileToDir(sourcePath, targetDir string) error { +func (c *copy) cpFileToDir(ctx context.Context, sourcePath, targetDir string) error { fileName := filepath.Base(sourcePath) targetPath := path.Join(targetDir, fileName) - return c.cpFileToFile(sourcePath, targetPath) + return c.cpFileToFile(ctx, sourcePath, targetPath) } -func (c *copy) cpFileToFile(sourcePath, targetPath string) error { +func (c *copy) cpFileToFile(ctx context.Context, sourcePath, targetPath string) error { // Get reader for file at source path - r, err := c.sourceFiler.Read(c.ctx, sourcePath) + r, err := c.sourceFiler.Read(ctx, sourcePath) if err != nil { return err } defer r.Close() if c.overwrite { - err = c.targetFiler.Write(c.ctx, targetPath, r, filer.OverwriteIfExists) + err = c.targetFiler.Write(ctx, targetPath, r, filer.OverwriteIfExists) if err != nil { return err } } else { - err = c.targetFiler.Write(c.ctx, targetPath, r) + err = c.targetFiler.Write(ctx, targetPath, r) // skip if file already exists if err != nil && errors.Is(err, fs.ErrExist) { - return c.emitFileSkippedEvent(sourcePath, targetPath) + return c.emitFileSkippedEvent(ctx, sourcePath, targetPath) } if err != nil { return err } } - return c.emitFileCopiedEvent(sourcePath, targetPath) + return c.emitFileCopiedEvent(ctx, sourcePath, targetPath) } // TODO: emit these events on stderr // TODO: add integration tests for these events -func (c *copy) emitFileSkippedEvent(sourcePath, targetPath string) error { +func (c *copy) emitFileSkippedEvent(ctx context.Context, sourcePath, targetPath string) error { fullSourcePath := sourcePath if c.sourceScheme != "" { fullSourcePath = path.Join(c.sourceScheme+":", sourcePath) @@ -150,10 +150,10 @@ func (c *copy) emitFileSkippedEvent(sourcePath, targetPath string) error { c.mu.Lock() defer c.mu.Unlock() - return cmdio.RenderWithTemplate(c.ctx, event, "", template) + return cmdio.RenderWithTemplate(ctx, event, "", template) } -func (c *copy) emitFileCopiedEvent(sourcePath, targetPath string) error { +func (c *copy) emitFileCopiedEvent(ctx context.Context, sourcePath, targetPath string) error { fullSourcePath := sourcePath if c.sourceScheme != "" { fullSourcePath = path.Join(c.sourceScheme+":", sourcePath) @@ -168,7 +168,7 @@ func (c *copy) emitFileCopiedEvent(sourcePath, targetPath string) error { c.mu.Lock() defer c.mu.Unlock() - return cmdio.RenderWithTemplate(c.ctx, event, "", template) + return cmdio.RenderWithTemplate(ctx, event, "", template) } // hasTrailingDirSeparator checks if a path ends with a directory separator. @@ -237,7 +237,6 @@ func newCpCommand() *cobra.Command { c.targetScheme = "dbfs" } - c.ctx = ctx c.sourceFiler = sourceFiler c.targetFiler = targetFiler @@ -249,7 +248,7 @@ func newCpCommand() *cobra.Command { // case 1: source path is a directory, then recursively create files at target path if sourceInfo.IsDir() { - return c.cpDirToDir(sourcePath, targetPath) + return c.cpDirToDir(ctx, sourcePath, targetPath) } // If target path has a trailing separator, trim it and let case 2 handle it @@ -260,11 +259,11 @@ func newCpCommand() *cobra.Command { // case 2: source path is a file, and target path is a directory. In this case // we copy the file to inside the directory if targetInfo, err := targetFiler.Stat(ctx, targetPath); err == nil && targetInfo.IsDir() { - return c.cpFileToDir(sourcePath, targetPath) + return c.cpFileToDir(ctx, sourcePath, targetPath) } // case 3: source path is a file, and target path is a file - return c.cpFileToFile(sourcePath, targetPath) + return c.cpFileToFile(ctx, sourcePath, targetPath) } v := newValidArgs() From 2a9ec1bd406c9f84b2b15c74671b334116859c9d Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 11 Dec 2025 21:41:29 +0000 Subject: [PATCH 06/18] Linter --- cmd/fs/cp_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/fs/cp_test.go b/cmd/fs/cp_test.go index 1d719368da..bf8930051c 100644 --- a/cmd/fs/cp_test.go +++ b/cmd/fs/cp_test.go @@ -3,6 +3,7 @@ package fs import ( "errors" "fmt" + "strconv" "testing" ) @@ -19,7 +20,7 @@ func TestCpConcurrencyValidation(t *testing.T) { for _, tc := range testCases { t.Run(fmt.Sprintf("concurrency=%d", tc.concurrency), func(t *testing.T) { cmd := newCpCommand() - cmd.SetArgs([]string{"src", "dst", "--concurrency", fmt.Sprintf("%d", tc.concurrency)}) + cmd.SetArgs([]string{"src", "dst", "--concurrency", strconv.Itoa(tc.concurrency)}) err := cmd.Execute() if !errors.Is(err, tc.wantError) { t.Errorf("expected error %v, got %v", tc.wantError, err) From 179a9443be3602c3e8e5708617fb834bf5d28733 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 12 Dec 2025 08:23:52 +0000 Subject: [PATCH 07/18] Test context cancellation --- cmd/fs/cp_test.go | 193 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 192 insertions(+), 1 deletion(-) diff --git a/cmd/fs/cp_test.go b/cmd/fs/cp_test.go index bf8930051c..98de33c74c 100644 --- a/cmd/fs/cp_test.go +++ b/cmd/fs/cp_test.go @@ -1,13 +1,20 @@ package fs import ( + "context" "errors" "fmt" + "io" + "io/fs" "strconv" + "strings" "testing" + "time" + + "github.com/databricks/cli/libs/filer" ) -func TestCpConcurrencyValidation(t *testing.T) { +func TestCp_concurrencyValidation(t *testing.T) { testCases := []struct { concurrency int wantError error @@ -28,3 +35,187 @@ func TestCpConcurrencyValidation(t *testing.T) { }) } } + +// mockFiler mocks filer.Filer. +type mockFiler struct { + write func(ctx context.Context, path string, r io.Reader, mode ...filer.WriteMode) error + read func(ctx context.Context, path string) (io.ReadCloser, error) + delete func(ctx context.Context, path string, mode ...filer.DeleteMode) error + readDir func(ctx context.Context, path string) ([]fs.DirEntry, error) + mkdir func(ctx context.Context, path string) error + stat func(ctx context.Context, path string) (fs.FileInfo, error) +} + +func (m *mockFiler) Write(ctx context.Context, path string, r io.Reader, mode ...filer.WriteMode) error { + if m.write == nil { + return nil + } + return m.write(ctx, path, r, mode...) +} + +func (m *mockFiler) Read(ctx context.Context, path string) (io.ReadCloser, error) { + if m.read == nil { + return nil, nil + } + return m.read(ctx, path) +} + +func (m *mockFiler) Delete(ctx context.Context, path string, mode ...filer.DeleteMode) error { + if m.delete == nil { + return nil + } + return m.delete(ctx, path, mode...) +} + +func (m *mockFiler) ReadDir(ctx context.Context, path string) ([]fs.DirEntry, error) { + if m.readDir == nil { + return nil, nil + } + return m.readDir(ctx, path) +} + +func (m *mockFiler) Mkdir(ctx context.Context, path string) error { + if m.mkdir == nil { + return nil + } + return m.mkdir(ctx, path) +} + +func (m *mockFiler) Stat(ctx context.Context, path string) (fs.FileInfo, error) { + if m.stat == nil { + return nil, nil + } + return m.stat(ctx, path) +} + +// mockFileInfo mocks fs.FileInfo. +type mockFileInfo struct { + name string + isDir bool +} + +func (m mockFileInfo) Name() string { return m.name } +func (m mockFileInfo) Size() int64 { return 0 } +func (m mockFileInfo) Mode() fs.FileMode { return 0o644 } +func (m mockFileInfo) ModTime() time.Time { return time.Time{} } +func (m mockFileInfo) IsDir() bool { return m.isDir } +func (m mockFileInfo) Sys() any { return nil } + +// mockDirEntry mocks fs.DirEntry. +type mockDirEntry struct { + name string + isDir bool +} + +func (m mockDirEntry) Name() string { return m.name } +func (m mockDirEntry) IsDir() bool { return m.isDir } +func (m mockDirEntry) Type() fs.FileMode { return 0 } +func (m mockDirEntry) Info() (fs.FileInfo, error) { + return mockFileInfo(m), nil +} + +func TestCp_cpDirToDir_contextCancellation(t *testing.T) { + testError := errors.New("test error") + + // Mock the stats and readDir methods for a Filer over a file system that + // has the following directory structure: + // + // src/ + // ├── subdir/ + // ├── file1.txt + // ├── file2.txt + // └── file3.txt + // + mockSourceStat := func(ctx context.Context, path string) (fs.FileInfo, error) { + isDir := path == "src" || path == "src/subdir" + return mockFileInfo{name: path, isDir: isDir}, nil + } + mockSourceReadDir := func(ctx context.Context, path string) ([]fs.DirEntry, error) { + if path == "src" { + return []fs.DirEntry{ + mockDirEntry{name: "subdir", isDir: true}, + mockDirEntry{name: "file1.txt", isDir: false}, + mockDirEntry{name: "file2.txt", isDir: false}, + mockDirEntry{name: "file3.txt", isDir: false}, + }, nil + } + return nil, nil + } + + testCases := []struct { + desc string + c *copy + wantErr error + }{ + { + // The source filer's Read method blocks until context is cancelled, + // simulating a slow file copy operation. The target filer's Mkdir + // method returns an error which should cancel the walk and all file + // copy goroutines. + desc: "cancel go routines on walk error", + c: ©{ + recursive: true, + concurrency: 5, + sourceFiler: &mockFiler{ + stat: mockSourceStat, + readDir: mockSourceReadDir, + read: func(ctx context.Context, path string) (io.ReadCloser, error) { + <-ctx.Done() // block until context is cancelled + return nil, ctx.Err() + }, + }, + targetFiler: &mockFiler{ + mkdir: func(ctx context.Context, path string) error { + return testError + }, + }, + }, + wantErr: testError, + }, + { + // The target filer's Write method returns an error when writing the + // file1.txt file. This error is expected to be returned by the file copy + // goroutine and all other file copy goroutines should be cancelled. + desc: "cancel go routines on file copy error", + c: ©{ + recursive: true, + concurrency: 5, + sourceFiler: &mockFiler{ + stat: mockSourceStat, + readDir: mockSourceReadDir, + read: func(ctx context.Context, path string) (io.ReadCloser, error) { + return io.NopCloser(strings.NewReader("content")), nil + }, + }, + targetFiler: &mockFiler{ + write: func(ctx context.Context, path string, r io.Reader, mode ...filer.WriteMode) error { + if path == "dst/file1.txt" { + return testError + } + <-ctx.Done() // block until context is cancelled + return ctx.Err() + }, + }, + }, + wantErr: testError, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + done := make(chan error, 1) + go func() { + done <- tc.c.cpDirToDir(t.Context(), "src", "dst") + }() + + select { + case gotErr := <-done: + if !errors.Is(gotErr, tc.wantErr) { + t.Errorf("want error %v, got %v", tc.wantErr, gotErr) + } + case <-time.After(3 * time.Second): // do not wait too long in case of test issues + t.Fatal("cpDirToDir blocked instead of returning error immediately") + } + }) + } +} From 79dd600cbdfed80c2b2b1a79b1c9c6cb08bfd739 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 12 Dec 2025 12:30:25 +0000 Subject: [PATCH 08/18] Clarified rationale --- cmd/fs/cp.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/cmd/fs/cp.go b/cmd/fs/cp.go index 6149b4c611..333c550d37 100644 --- a/cmd/fs/cp.go +++ b/cmd/fs/cp.go @@ -38,7 +38,7 @@ type copy struct { mu sync.Mutex // protect output from concurrent writes } -// cpDirToDir recursively copies the contents of a directory to another +// cpDirToDir recursively copies the content of a directory to another // directory. // // There is no guarantee on the order in which the files are copied. @@ -51,13 +51,15 @@ func (c *copy) cpDirToDir(ctx context.Context, sourceDir, targetDir string) erro return fmt.Errorf("source path %s is a directory. Please specify the --recursive flag", sourceDir) } - // Create cancellable context to ensure cleanup and that all goroutines - // are stopped when the function exits on any error path (e.g. permission - // denied when walking the source directory). + // Create a cancellable context purely for the purpose of having a way to + // cancel the goroutines in case of error walking the directory. ctx, cancel := context.WithCancel(ctx) defer cancel() - // Pool of workers to process copy operations in parallel. + // Pool of workers to process copy operations in parallel. The created + // context is the real context for this operation. It is shared by the + // walking function and the goroutines and can be cancelled manually + // by calling the cancel() function of its parent context. g, ctx := errgroup.WithContext(ctx) g.SetLimit(c.concurrency) @@ -85,8 +87,10 @@ func (c *copy) cpDirToDir(ctx context.Context, sourceDir, targetDir string) erro return c.targetFiler.Mkdir(ctx, targetPath) } - // Queue file copy operation for processing. g.Go(func() error { + // Goroutines are queued and may start after the context is already + // cancelled (e.g. a prior copy failed). This check aims to avoid + // starting work that will inevitably fail. if ctx.Err() != nil { return ctx.Err() } From 2d1f19da1533c741fa61fd323b7ce731f0ee348c Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 12 Dec 2025 12:54:48 +0000 Subject: [PATCH 09/18] Gracefully wait for cancellation to complete --- cmd/fs/cp.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmd/fs/cp.go b/cmd/fs/cp.go index 333c550d37..f34cf2882e 100644 --- a/cmd/fs/cp.go +++ b/cmd/fs/cp.go @@ -99,7 +99,9 @@ func (c *copy) cpDirToDir(ctx context.Context, sourceDir, targetDir string) erro return nil }) if err != nil { - return err + cancel() // cancel the goroutines + _ = g.Wait() // wait for the goroutines to finish + return err // return the "real" error that led to cancellation } return g.Wait() } From 12dec5c902d9b074a3d976b82c258d39ad787726 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 15 Dec 2025 09:01:36 +0000 Subject: [PATCH 10/18] Add acceptance tests for fs cp --- acceptance/cmd/fs/cp/dir-to-dir/out.test.toml | 5 +++ acceptance/cmd/fs/cp/dir-to-dir/output.txt | 2 ++ acceptance/cmd/fs/cp/dir-to-dir/script | 6 ++++ acceptance/cmd/fs/cp/dir-to-dir/test.toml | 20 +++++++++++ .../cmd/fs/cp/file-to-dir/out.test.toml | 5 +++ acceptance/cmd/fs/cp/file-to-dir/output.txt | 3 ++ acceptance/cmd/fs/cp/file-to-dir/script | 4 +++ acceptance/cmd/fs/cp/file-to-dir/test.toml | 12 +++++++ .../cmd/fs/cp/file-to-file/out.test.toml | 5 +++ acceptance/cmd/fs/cp/file-to-file/output.txt | 7 ++++ acceptance/cmd/fs/cp/file-to-file/script | 9 +++++ acceptance/cmd/fs/cp/file-to-file/test.toml | 34 +++++++++++++++++++ .../cmd/fs/cp/input-validation/out.test.toml | 5 +++ .../cmd/fs/cp/input-validation/output.txt | 10 ++++++ acceptance/cmd/fs/cp/input-validation/script | 3 ++ .../cmd/fs/cp/input-validation/test.toml | 2 ++ cmd/fs/cp.go | 6 ++-- cmd/fs/cp_test.go | 24 ------------- 18 files changed, 136 insertions(+), 26 deletions(-) create mode 100644 acceptance/cmd/fs/cp/dir-to-dir/out.test.toml create mode 100644 acceptance/cmd/fs/cp/dir-to-dir/output.txt create mode 100644 acceptance/cmd/fs/cp/dir-to-dir/script create mode 100644 acceptance/cmd/fs/cp/dir-to-dir/test.toml create mode 100644 acceptance/cmd/fs/cp/file-to-dir/out.test.toml create mode 100644 acceptance/cmd/fs/cp/file-to-dir/output.txt create mode 100644 acceptance/cmd/fs/cp/file-to-dir/script create mode 100644 acceptance/cmd/fs/cp/file-to-dir/test.toml create mode 100644 acceptance/cmd/fs/cp/file-to-file/out.test.toml create mode 100644 acceptance/cmd/fs/cp/file-to-file/output.txt create mode 100644 acceptance/cmd/fs/cp/file-to-file/script create mode 100644 acceptance/cmd/fs/cp/file-to-file/test.toml create mode 100644 acceptance/cmd/fs/cp/input-validation/out.test.toml create mode 100644 acceptance/cmd/fs/cp/input-validation/output.txt create mode 100644 acceptance/cmd/fs/cp/input-validation/script create mode 100644 acceptance/cmd/fs/cp/input-validation/test.toml diff --git a/acceptance/cmd/fs/cp/dir-to-dir/out.test.toml b/acceptance/cmd/fs/cp/dir-to-dir/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/cmd/fs/cp/dir-to-dir/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/cmd/fs/cp/dir-to-dir/output.txt b/acceptance/cmd/fs/cp/dir-to-dir/output.txt new file mode 100644 index 0000000000..9b708fb9f0 --- /dev/null +++ b/acceptance/cmd/fs/cp/dir-to-dir/output.txt @@ -0,0 +1,2 @@ +localdir/file1.txt -> dbfs:/Volumes/main/default/data/uploaded-dir/file1.txt +localdir/file2.txt -> dbfs:/Volumes/main/default/data/uploaded-dir/file2.txt diff --git a/acceptance/cmd/fs/cp/dir-to-dir/script b/acceptance/cmd/fs/cp/dir-to-dir/script new file mode 100644 index 0000000000..6f0b8510b0 --- /dev/null +++ b/acceptance/cmd/fs/cp/dir-to-dir/script @@ -0,0 +1,6 @@ +mkdir -p localdir +echo -n "file1 content" > localdir/file1.txt +echo -n "file2 content" > localdir/file2.txt + +# Recursive directory copy (output sorted for deterministic ordering). +$CLI fs cp -r localdir dbfs:/Volumes/main/default/data/uploaded-dir 2>&1 | sort diff --git a/acceptance/cmd/fs/cp/dir-to-dir/test.toml b/acceptance/cmd/fs/cp/dir-to-dir/test.toml new file mode 100644 index 0000000000..10da309c5b --- /dev/null +++ b/acceptance/cmd/fs/cp/dir-to-dir/test.toml @@ -0,0 +1,20 @@ +Local = true +Cloud = false +Ignore = ["localdir"] + +# Recursive copy: localdir/ -> uploaded-dir/. +[[Server]] +Pattern = "PUT /api/2.0/fs/directories/Volumes/main/default/data/uploaded-dir" +Response.StatusCode = 200 + +[[Server]] +Pattern = "HEAD /api/2.0/fs/directories/Volumes/main/default/data/uploaded-dir" +Response.StatusCode = 200 + +[[Server]] +Pattern = "PUT /api/2.0/fs/files/Volumes/main/default/data/uploaded-dir/file1.txt" +Response.StatusCode = 200 + +[[Server]] +Pattern = "PUT /api/2.0/fs/files/Volumes/main/default/data/uploaded-dir/file2.txt" +Response.StatusCode = 200 diff --git a/acceptance/cmd/fs/cp/file-to-dir/out.test.toml b/acceptance/cmd/fs/cp/file-to-dir/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/cmd/fs/cp/file-to-dir/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/cmd/fs/cp/file-to-dir/output.txt b/acceptance/cmd/fs/cp/file-to-dir/output.txt new file mode 100644 index 0000000000..1bbd3d2cad --- /dev/null +++ b/acceptance/cmd/fs/cp/file-to-dir/output.txt @@ -0,0 +1,3 @@ + +>>> [CLI] fs cp local.txt dbfs:/Volumes/main/default/data/mydir/ +local.txt -> dbfs:/Volumes/main/default/data/mydir/local.txt diff --git a/acceptance/cmd/fs/cp/file-to-dir/script b/acceptance/cmd/fs/cp/file-to-dir/script new file mode 100644 index 0000000000..d21baf28bd --- /dev/null +++ b/acceptance/cmd/fs/cp/file-to-dir/script @@ -0,0 +1,4 @@ +echo -n "hello world!" > local.txt + +# Copy file into a directory (trailing slash indicates directory target). +trace $CLI fs cp local.txt dbfs:/Volumes/main/default/data/mydir/ diff --git a/acceptance/cmd/fs/cp/file-to-dir/test.toml b/acceptance/cmd/fs/cp/file-to-dir/test.toml new file mode 100644 index 0000000000..d8c7892808 --- /dev/null +++ b/acceptance/cmd/fs/cp/file-to-dir/test.toml @@ -0,0 +1,12 @@ +Local = true +Cloud = false +Ignore = ["local.txt"] + +# Copy file into existing directory: local.txt -> mydir/local.txt. +[[Server]] +Pattern = "HEAD /api/2.0/fs/directories/Volumes/main/default/data/mydir" +Response.StatusCode = 200 + +[[Server]] +Pattern = "PUT /api/2.0/fs/files/Volumes/main/default/data/mydir/local.txt" +Response.StatusCode = 200 diff --git a/acceptance/cmd/fs/cp/file-to-file/out.test.toml b/acceptance/cmd/fs/cp/file-to-file/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/cmd/fs/cp/file-to-file/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/cmd/fs/cp/file-to-file/output.txt b/acceptance/cmd/fs/cp/file-to-file/output.txt new file mode 100644 index 0000000000..93b9425293 --- /dev/null +++ b/acceptance/cmd/fs/cp/file-to-file/output.txt @@ -0,0 +1,7 @@ + +>>> [CLI] fs cp local.txt dbfs:/Volumes/main/default/data/uploaded.txt +local.txt -> dbfs:/Volumes/main/default/data/uploaded.txt + +>>> [CLI] fs cp dbfs:/Volumes/main/default/data/remote.txt downloaded.txt +dbfs:/Volumes/main/default/data/remote.txt -> downloaded.txt +content from volume \ No newline at end of file diff --git a/acceptance/cmd/fs/cp/file-to-file/script b/acceptance/cmd/fs/cp/file-to-file/script new file mode 100644 index 0000000000..510ec04618 --- /dev/null +++ b/acceptance/cmd/fs/cp/file-to-file/script @@ -0,0 +1,9 @@ +echo -n "hello world!" > local.txt + +# Upload local file to volume. +trace $CLI fs cp local.txt dbfs:/Volumes/main/default/data/uploaded.txt + +# Download file from volume to local. +trace $CLI fs cp dbfs:/Volumes/main/default/data/remote.txt downloaded.txt + +cat downloaded.txt diff --git a/acceptance/cmd/fs/cp/file-to-file/test.toml b/acceptance/cmd/fs/cp/file-to-file/test.toml new file mode 100644 index 0000000000..e083bbfdbc --- /dev/null +++ b/acceptance/cmd/fs/cp/file-to-file/test.toml @@ -0,0 +1,34 @@ +Local = true +Cloud = false +Ignore = ["local.txt", "downloaded.txt"] + +# Upload: local.txt -> dbfs:/Volumes/.../uploaded.txt. +[[Server]] +Pattern = "HEAD /api/2.0/fs/directories/Volumes/main/default/data/uploaded.txt" +Response.StatusCode = 404 + +[[Server]] +Pattern = "HEAD /api/2.0/fs/files/Volumes/main/default/data/uploaded.txt" +Response.StatusCode = 404 + +[[Server]] +Pattern = "HEAD /api/2.0/fs/directories/Volumes/main/default/data" +Response.StatusCode = 200 + +[[Server]] +Pattern = "PUT /api/2.0/fs/files/Volumes/main/default/data/uploaded.txt" +Response.StatusCode = 200 + +# Download: dbfs:/Volumes/.../remote.txt -> downloaded.txt. +[[Server]] +Pattern = "HEAD /api/2.0/fs/directories/Volumes/main/default/data/remote.txt" +Response.StatusCode = 404 + +[[Server]] +Pattern = "HEAD /api/2.0/fs/files/Volumes/main/default/data/remote.txt" +Response.StatusCode = 200 + +[[Server]] +Pattern = "GET /api/2.0/fs/files/Volumes/main/default/data/remote.txt" +Response.StatusCode = 200 +Response.Body = "content from volume" diff --git a/acceptance/cmd/fs/cp/input-validation/out.test.toml b/acceptance/cmd/fs/cp/input-validation/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/cmd/fs/cp/input-validation/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/cmd/fs/cp/input-validation/output.txt b/acceptance/cmd/fs/cp/input-validation/output.txt new file mode 100644 index 0000000000..febe55b74e --- /dev/null +++ b/acceptance/cmd/fs/cp/input-validation/output.txt @@ -0,0 +1,10 @@ + +>>> errcode [CLI] fs cp src dst --concurrency -1 +Error: --concurrency must be at least 1 + +Exit code: 1 + +>>> errcode [CLI] fs cp src dst --concurrency 0 +Error: --concurrency must be at least 1 + +Exit code: 1 diff --git a/acceptance/cmd/fs/cp/input-validation/script b/acceptance/cmd/fs/cp/input-validation/script new file mode 100644 index 0000000000..a5e8cec862 --- /dev/null +++ b/acceptance/cmd/fs/cp/input-validation/script @@ -0,0 +1,3 @@ +# Invalid concurrency values should fail. +trace errcode $CLI fs cp src dst --concurrency -1 +trace errcode $CLI fs cp src dst --concurrency 0 diff --git a/acceptance/cmd/fs/cp/input-validation/test.toml b/acceptance/cmd/fs/cp/input-validation/test.toml new file mode 100644 index 0000000000..7d36fb9dc1 --- /dev/null +++ b/acceptance/cmd/fs/cp/input-validation/test.toml @@ -0,0 +1,2 @@ +Local = true +Cloud = false diff --git a/cmd/fs/cp.go b/cmd/fs/cp.go index f34cf2882e..d7ae651753 100644 --- a/cmd/fs/cp.go +++ b/cmd/fs/cp.go @@ -18,8 +18,10 @@ import ( "golang.org/x/sync/errgroup" ) -// Default number of concurrent file copy operations. -const defaultConcurrency = 16 +// Default number of concurrent file copy operations. This is a conservative +// default that should be sufficient to fully utilize the available bandwidth +// in most cases. +const defaultConcurrency = 8 // errInvalidConcurrency is returned when the value of the concurrency // flag is invalid. diff --git a/cmd/fs/cp_test.go b/cmd/fs/cp_test.go index 98de33c74c..b50ee0658f 100644 --- a/cmd/fs/cp_test.go +++ b/cmd/fs/cp_test.go @@ -3,10 +3,8 @@ package fs import ( "context" "errors" - "fmt" "io" "io/fs" - "strconv" "strings" "testing" "time" @@ -14,28 +12,6 @@ import ( "github.com/databricks/cli/libs/filer" ) -func TestCp_concurrencyValidation(t *testing.T) { - testCases := []struct { - concurrency int - wantError error - }{ - {-1337, errInvalidConcurrency}, - {-1, errInvalidConcurrency}, - {0, errInvalidConcurrency}, - } - - for _, tc := range testCases { - t.Run(fmt.Sprintf("concurrency=%d", tc.concurrency), func(t *testing.T) { - cmd := newCpCommand() - cmd.SetArgs([]string{"src", "dst", "--concurrency", strconv.Itoa(tc.concurrency)}) - err := cmd.Execute() - if !errors.Is(err, tc.wantError) { - t.Errorf("expected error %v, got %v", tc.wantError, err) - } - }) - } -} - // mockFiler mocks filer.Filer. type mockFiler struct { write func(ctx context.Context, path string, r io.Reader, mode ...filer.WriteMode) error From 6a8077b151590452f6196fb2306a30d3eb797a98 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 8 Jan 2026 14:36:43 +0000 Subject: [PATCH 11/18] Address feedback --- .../cmd/fs/cp/dir-to-dir/localdir/file1.txt | 1 + .../cmd/fs/cp/dir-to-dir/localdir/file2.txt | 1 + acceptance/cmd/fs/cp/dir-to-dir/output.txt | 7 ++ acceptance/cmd/fs/cp/dir-to-dir/script | 11 +-- acceptance/cmd/fs/cp/dir-to-dir/test.toml | 18 ----- acceptance/cmd/fs/cp/file-to-dir/local.txt | 1 + acceptance/cmd/fs/cp/file-to-dir/output.txt | 3 + acceptance/cmd/fs/cp/file-to-dir/script | 6 +- acceptance/cmd/fs/cp/file-to-dir/test.toml | 10 --- acceptance/cmd/fs/cp/file-to-file/local.txt | 1 + acceptance/cmd/fs/cp/file-to-file/output.txt | 10 ++- acceptance/cmd/fs/cp/file-to-file/script | 14 ++-- acceptance/cmd/fs/cp/file-to-file/test.toml | 32 --------- libs/testserver/fake_workspace.go | 24 +++++++ libs/testserver/handlers.go | 71 ++++++++++++++++++- 15 files changed, 136 insertions(+), 74 deletions(-) create mode 100644 acceptance/cmd/fs/cp/dir-to-dir/localdir/file1.txt create mode 100644 acceptance/cmd/fs/cp/dir-to-dir/localdir/file2.txt create mode 100644 acceptance/cmd/fs/cp/file-to-dir/local.txt create mode 100644 acceptance/cmd/fs/cp/file-to-file/local.txt diff --git a/acceptance/cmd/fs/cp/dir-to-dir/localdir/file1.txt b/acceptance/cmd/fs/cp/dir-to-dir/localdir/file1.txt new file mode 100644 index 0000000000..2e80f50e92 --- /dev/null +++ b/acceptance/cmd/fs/cp/dir-to-dir/localdir/file1.txt @@ -0,0 +1 @@ +file1 content \ No newline at end of file diff --git a/acceptance/cmd/fs/cp/dir-to-dir/localdir/file2.txt b/acceptance/cmd/fs/cp/dir-to-dir/localdir/file2.txt new file mode 100644 index 0000000000..79cd8cc836 --- /dev/null +++ b/acceptance/cmd/fs/cp/dir-to-dir/localdir/file2.txt @@ -0,0 +1 @@ +file2 content \ No newline at end of file diff --git a/acceptance/cmd/fs/cp/dir-to-dir/output.txt b/acceptance/cmd/fs/cp/dir-to-dir/output.txt index 9b708fb9f0..47387471a8 100644 --- a/acceptance/cmd/fs/cp/dir-to-dir/output.txt +++ b/acceptance/cmd/fs/cp/dir-to-dir/output.txt @@ -1,2 +1,9 @@ + +>>> [CLI] fs cp -r localdir dbfs:/Volumes/main/default/data/uploaded-dir localdir/file1.txt -> dbfs:/Volumes/main/default/data/uploaded-dir/file1.txt localdir/file2.txt -> dbfs:/Volumes/main/default/data/uploaded-dir/file2.txt + +>>> [CLI] fs cat dbfs:/Volumes/main/default/data/uploaded-dir/file1.txt +file1 content +>>> [CLI] fs cat dbfs:/Volumes/main/default/data/uploaded-dir/file2.txt +file2 content \ No newline at end of file diff --git a/acceptance/cmd/fs/cp/dir-to-dir/script b/acceptance/cmd/fs/cp/dir-to-dir/script index 6f0b8510b0..e98f6fe5be 100644 --- a/acceptance/cmd/fs/cp/dir-to-dir/script +++ b/acceptance/cmd/fs/cp/dir-to-dir/script @@ -1,6 +1,9 @@ -mkdir -p localdir -echo -n "file1 content" > localdir/file1.txt -echo -n "file2 content" > localdir/file2.txt +# Create parent directory. +$CLI fs mkdir dbfs:/Volumes/main/default/data # Recursive directory copy (output sorted for deterministic ordering). -$CLI fs cp -r localdir dbfs:/Volumes/main/default/data/uploaded-dir 2>&1 | sort +trace $CLI fs cp -r localdir dbfs:/Volumes/main/default/data/uploaded-dir 2>&1 | sort + +# Verify files were uploaded correctly. +trace $CLI fs cat dbfs:/Volumes/main/default/data/uploaded-dir/file1.txt +trace $CLI fs cat dbfs:/Volumes/main/default/data/uploaded-dir/file2.txt diff --git a/acceptance/cmd/fs/cp/dir-to-dir/test.toml b/acceptance/cmd/fs/cp/dir-to-dir/test.toml index 10da309c5b..7d36fb9dc1 100644 --- a/acceptance/cmd/fs/cp/dir-to-dir/test.toml +++ b/acceptance/cmd/fs/cp/dir-to-dir/test.toml @@ -1,20 +1,2 @@ Local = true Cloud = false -Ignore = ["localdir"] - -# Recursive copy: localdir/ -> uploaded-dir/. -[[Server]] -Pattern = "PUT /api/2.0/fs/directories/Volumes/main/default/data/uploaded-dir" -Response.StatusCode = 200 - -[[Server]] -Pattern = "HEAD /api/2.0/fs/directories/Volumes/main/default/data/uploaded-dir" -Response.StatusCode = 200 - -[[Server]] -Pattern = "PUT /api/2.0/fs/files/Volumes/main/default/data/uploaded-dir/file1.txt" -Response.StatusCode = 200 - -[[Server]] -Pattern = "PUT /api/2.0/fs/files/Volumes/main/default/data/uploaded-dir/file2.txt" -Response.StatusCode = 200 diff --git a/acceptance/cmd/fs/cp/file-to-dir/local.txt b/acceptance/cmd/fs/cp/file-to-dir/local.txt new file mode 100644 index 0000000000..bc7774a7b1 --- /dev/null +++ b/acceptance/cmd/fs/cp/file-to-dir/local.txt @@ -0,0 +1 @@ +hello world! \ No newline at end of file diff --git a/acceptance/cmd/fs/cp/file-to-dir/output.txt b/acceptance/cmd/fs/cp/file-to-dir/output.txt index 1bbd3d2cad..3247705e69 100644 --- a/acceptance/cmd/fs/cp/file-to-dir/output.txt +++ b/acceptance/cmd/fs/cp/file-to-dir/output.txt @@ -1,3 +1,6 @@ >>> [CLI] fs cp local.txt dbfs:/Volumes/main/default/data/mydir/ local.txt -> dbfs:/Volumes/main/default/data/mydir/local.txt + +>>> [CLI] fs cat dbfs:/Volumes/main/default/data/mydir/local.txt +hello world! \ No newline at end of file diff --git a/acceptance/cmd/fs/cp/file-to-dir/script b/acceptance/cmd/fs/cp/file-to-dir/script index d21baf28bd..e9c81e2957 100644 --- a/acceptance/cmd/fs/cp/file-to-dir/script +++ b/acceptance/cmd/fs/cp/file-to-dir/script @@ -1,4 +1,8 @@ -echo -n "hello world!" > local.txt +# Create target directory. +$CLI fs mkdir dbfs:/Volumes/main/default/data/mydir # Copy file into a directory (trailing slash indicates directory target). trace $CLI fs cp local.txt dbfs:/Volumes/main/default/data/mydir/ + +# Verify file was uploaded correctly. +trace $CLI fs cat dbfs:/Volumes/main/default/data/mydir/local.txt diff --git a/acceptance/cmd/fs/cp/file-to-dir/test.toml b/acceptance/cmd/fs/cp/file-to-dir/test.toml index d8c7892808..7d36fb9dc1 100644 --- a/acceptance/cmd/fs/cp/file-to-dir/test.toml +++ b/acceptance/cmd/fs/cp/file-to-dir/test.toml @@ -1,12 +1,2 @@ Local = true Cloud = false -Ignore = ["local.txt"] - -# Copy file into existing directory: local.txt -> mydir/local.txt. -[[Server]] -Pattern = "HEAD /api/2.0/fs/directories/Volumes/main/default/data/mydir" -Response.StatusCode = 200 - -[[Server]] -Pattern = "PUT /api/2.0/fs/files/Volumes/main/default/data/mydir/local.txt" -Response.StatusCode = 200 diff --git a/acceptance/cmd/fs/cp/file-to-file/local.txt b/acceptance/cmd/fs/cp/file-to-file/local.txt new file mode 100644 index 0000000000..bc7774a7b1 --- /dev/null +++ b/acceptance/cmd/fs/cp/file-to-file/local.txt @@ -0,0 +1 @@ +hello world! \ No newline at end of file diff --git a/acceptance/cmd/fs/cp/file-to-file/output.txt b/acceptance/cmd/fs/cp/file-to-file/output.txt index 93b9425293..60ca172f6e 100644 --- a/acceptance/cmd/fs/cp/file-to-file/output.txt +++ b/acceptance/cmd/fs/cp/file-to-file/output.txt @@ -2,6 +2,10 @@ >>> [CLI] fs cp local.txt dbfs:/Volumes/main/default/data/uploaded.txt local.txt -> dbfs:/Volumes/main/default/data/uploaded.txt ->>> [CLI] fs cp dbfs:/Volumes/main/default/data/remote.txt downloaded.txt -dbfs:/Volumes/main/default/data/remote.txt -> downloaded.txt -content from volume \ No newline at end of file +>>> [CLI] fs cat dbfs:/Volumes/main/default/data/uploaded.txt +hello world! +>>> [CLI] fs cp dbfs:/Volumes/main/default/data/uploaded.txt downloaded.txt +dbfs:/Volumes/main/default/data/uploaded.txt -> downloaded.txt + +>>> cat downloaded.txt +hello world! \ No newline at end of file diff --git a/acceptance/cmd/fs/cp/file-to-file/script b/acceptance/cmd/fs/cp/file-to-file/script index 510ec04618..276b63082c 100644 --- a/acceptance/cmd/fs/cp/file-to-file/script +++ b/acceptance/cmd/fs/cp/file-to-file/script @@ -1,9 +1,15 @@ -echo -n "hello world!" > local.txt +# Create parent directory. +$CLI fs mkdir dbfs:/Volumes/main/default/data # Upload local file to volume. trace $CLI fs cp local.txt dbfs:/Volumes/main/default/data/uploaded.txt -# Download file from volume to local. -trace $CLI fs cp dbfs:/Volumes/main/default/data/remote.txt downloaded.txt +# Verify file was uploaded correctly. +trace $CLI fs cat dbfs:/Volumes/main/default/data/uploaded.txt -cat downloaded.txt +# Download the same file back to verify round-trip. +trace $CLI fs cp dbfs:/Volumes/main/default/data/uploaded.txt downloaded.txt + +# Verify downloaded content matches original. +trace cat downloaded.txt +rm downloaded.txt diff --git a/acceptance/cmd/fs/cp/file-to-file/test.toml b/acceptance/cmd/fs/cp/file-to-file/test.toml index e083bbfdbc..7d36fb9dc1 100644 --- a/acceptance/cmd/fs/cp/file-to-file/test.toml +++ b/acceptance/cmd/fs/cp/file-to-file/test.toml @@ -1,34 +1,2 @@ Local = true Cloud = false -Ignore = ["local.txt", "downloaded.txt"] - -# Upload: local.txt -> dbfs:/Volumes/.../uploaded.txt. -[[Server]] -Pattern = "HEAD /api/2.0/fs/directories/Volumes/main/default/data/uploaded.txt" -Response.StatusCode = 404 - -[[Server]] -Pattern = "HEAD /api/2.0/fs/files/Volumes/main/default/data/uploaded.txt" -Response.StatusCode = 404 - -[[Server]] -Pattern = "HEAD /api/2.0/fs/directories/Volumes/main/default/data" -Response.StatusCode = 200 - -[[Server]] -Pattern = "PUT /api/2.0/fs/files/Volumes/main/default/data/uploaded.txt" -Response.StatusCode = 200 - -# Download: dbfs:/Volumes/.../remote.txt -> downloaded.txt. -[[Server]] -Pattern = "HEAD /api/2.0/fs/directories/Volumes/main/default/data/remote.txt" -Response.StatusCode = 404 - -[[Server]] -Pattern = "HEAD /api/2.0/fs/files/Volumes/main/default/data/remote.txt" -Response.StatusCode = 200 - -[[Server]] -Pattern = "GET /api/2.0/fs/files/Volumes/main/default/data/remote.txt" -Response.StatusCode = 200 -Response.Body = "content from volume" diff --git a/libs/testserver/fake_workspace.go b/libs/testserver/fake_workspace.go index 8cb87dc9cf..58018bfe9a 100644 --- a/libs/testserver/fake_workspace.go +++ b/libs/testserver/fake_workspace.go @@ -384,6 +384,30 @@ func (s *FakeWorkspace) WorkspaceFilesExportFile(path string) []byte { return s.files[path].Data } +// FileExists checks if a file exists at the given path. +func (s *FakeWorkspace) FileExists(path string) bool { + if !strings.HasPrefix(path, "/") { + path = "/" + path + } + + defer s.LockUnlock()() + + _, exists := s.files[path] + return exists +} + +// DirectoryExists checks if a directory exists at the given path. +func (s *FakeWorkspace) DirectoryExists(path string) bool { + if !strings.HasPrefix(path, "/") { + path = "/" + path + } + + defer s.LockUnlock()() + + _, exists := s.directories[path] + return exists +} + // jsonConvert saves input to a value pointed by output func jsonConvert(input, output any) error { writer := new(bytes.Buffer) diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index c46457ba3f..9f4d6fcabf 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "net/http" + "strings" "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/compute" @@ -145,9 +146,61 @@ func AddDefaultHandlers(server *Server) { }) server.Handle("HEAD", "/api/2.0/fs/directories/{path:.*}", func(req Request) any { - return Response{ - Body: "dir path: " + req.Vars["dir_path"], + path := req.Vars["path"] + if !strings.HasPrefix(path, "/") { + path = "/" + path + } + + if req.Workspace.FileExists(path) { + return Response{StatusCode: 404} + } + if req.Workspace.DirectoryExists(path) { + return Response{StatusCode: 200} + } + return Response{StatusCode: 404} + }) + + server.Handle("HEAD", "/api/2.0/fs/files/{path:.*}", func(req Request) any { + path := req.Vars["path"] + if req.Workspace.FileExists(path) { + return Response{StatusCode: 200} } + return Response{StatusCode: 404} + }) + + server.Handle("PUT", "/api/2.0/fs/directories/{path:.*}", func(req Request) any { + path := req.Vars["path"] + // Normalize path to start with "/". + if !strings.HasPrefix(path, "/") { + path = "/" + path + } + defer req.Workspace.LockUnlock()() + + // Create this directory. + req.Workspace.directories[path] = workspace.ObjectInfo{ + ObjectType: "DIRECTORY", + Path: path, + ObjectId: nextID(), + } + + // Also create all parent directories. + for dir := path; dir != "/" && dir != ""; { + dir = strings.TrimSuffix(dir, "/") + idx := strings.LastIndex(dir, "/") + if idx <= 0 { + break + } + dir = dir[:idx] + if _, exists := req.Workspace.directories[dir]; !exists { + req.Workspace.directories[dir] = workspace.ObjectInfo{ + ObjectType: "DIRECTORY", + Path: dir, + ObjectId: nextID(), + } + } + } + + return Response{} }) server.Handle("PUT", "/api/2.0/fs/files/{path:.*}", func(req Request) any { @@ -156,6 +209,20 @@ func AddDefaultHandlers(server *Server) { return req.Workspace.WorkspaceFilesImportFile(path, req.Body, overwrite) }) + server.Handle("GET", "/api/2.0/fs/files/{path:.*}", func(req Request) any { + path := req.Vars["path"] + data := req.Workspace.WorkspaceFilesExportFile(path) + if data == nil { + return Response{ + StatusCode: 404, + Body: map[string]string{ + "message": "file does not exist", + }, + } + } + return data + }) + server.Handle("GET", "/api/2.1/unity-catalog/current-metastore-assignment", func(req Request) any { return TestMetastore }) From 8d0ede6d70f51930a77ca722858644daa4d0e568 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 9 Jan 2026 08:50:25 +0000 Subject: [PATCH 12/18] Enable cloud testing --- acceptance/cmd/fs/cp/dir-to-dir/out.test.toml | 2 +- acceptance/cmd/fs/cp/dir-to-dir/test.toml | 2 +- .../cmd/fs/cp/file-to-dir/out.test.toml | 2 +- acceptance/cmd/fs/cp/file-to-dir/test.toml | 2 +- .../cmd/fs/cp/file-to-file/out.test.toml | 2 +- acceptance/cmd/fs/cp/file-to-file/test.toml | 2 +- libs/testserver/handlers.go | 26 +++++-------------- 7 files changed, 12 insertions(+), 26 deletions(-) diff --git a/acceptance/cmd/fs/cp/dir-to-dir/out.test.toml b/acceptance/cmd/fs/cp/dir-to-dir/out.test.toml index d560f1de04..01ed6822af 100644 --- a/acceptance/cmd/fs/cp/dir-to-dir/out.test.toml +++ b/acceptance/cmd/fs/cp/dir-to-dir/out.test.toml @@ -1,5 +1,5 @@ Local = true -Cloud = false +Cloud = true [EnvMatrix] DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/cmd/fs/cp/dir-to-dir/test.toml b/acceptance/cmd/fs/cp/dir-to-dir/test.toml index 7d36fb9dc1..0e8c8a3840 100644 --- a/acceptance/cmd/fs/cp/dir-to-dir/test.toml +++ b/acceptance/cmd/fs/cp/dir-to-dir/test.toml @@ -1,2 +1,2 @@ Local = true -Cloud = false +Cloud = true diff --git a/acceptance/cmd/fs/cp/file-to-dir/out.test.toml b/acceptance/cmd/fs/cp/file-to-dir/out.test.toml index d560f1de04..01ed6822af 100644 --- a/acceptance/cmd/fs/cp/file-to-dir/out.test.toml +++ b/acceptance/cmd/fs/cp/file-to-dir/out.test.toml @@ -1,5 +1,5 @@ Local = true -Cloud = false +Cloud = true [EnvMatrix] DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/cmd/fs/cp/file-to-dir/test.toml b/acceptance/cmd/fs/cp/file-to-dir/test.toml index 7d36fb9dc1..0e8c8a3840 100644 --- a/acceptance/cmd/fs/cp/file-to-dir/test.toml +++ b/acceptance/cmd/fs/cp/file-to-dir/test.toml @@ -1,2 +1,2 @@ Local = true -Cloud = false +Cloud = true diff --git a/acceptance/cmd/fs/cp/file-to-file/out.test.toml b/acceptance/cmd/fs/cp/file-to-file/out.test.toml index d560f1de04..01ed6822af 100644 --- a/acceptance/cmd/fs/cp/file-to-file/out.test.toml +++ b/acceptance/cmd/fs/cp/file-to-file/out.test.toml @@ -1,5 +1,5 @@ Local = true -Cloud = false +Cloud = true [EnvMatrix] DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/cmd/fs/cp/file-to-file/test.toml b/acceptance/cmd/fs/cp/file-to-file/test.toml index 7d36fb9dc1..0e8c8a3840 100644 --- a/acceptance/cmd/fs/cp/file-to-file/test.toml +++ b/acceptance/cmd/fs/cp/file-to-file/test.toml @@ -1,2 +1,2 @@ Local = true -Cloud = false +Cloud = true diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index 9f4d6fcabf..175a629155 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "net/http" + "path" "strings" "github.com/databricks/databricks-sdk-go/service/catalog" @@ -169,28 +170,14 @@ func AddDefaultHandlers(server *Server) { }) server.Handle("PUT", "/api/2.0/fs/directories/{path:.*}", func(req Request) any { - path := req.Vars["path"] - // Normalize path to start with "/". - if !strings.HasPrefix(path, "/") { - path = "/" + path + dirPath := req.Vars["path"] + if !strings.HasPrefix(dirPath, "/") { + dirPath = "/" + dirPath } defer req.Workspace.LockUnlock()() - // Create this directory. - req.Workspace.directories[path] = workspace.ObjectInfo{ - ObjectType: "DIRECTORY", - Path: path, - ObjectId: nextID(), - } - - // Also create all parent directories. - for dir := path; dir != "/" && dir != ""; { - dir = strings.TrimSuffix(dir, "/") - idx := strings.LastIndex(dir, "/") - if idx <= 0 { - break - } - dir = dir[:idx] + // Create directory and all parents. + for dir := dirPath; dir != "/" && dir != ""; dir = path.Dir(dir) { if _, exists := req.Workspace.directories[dir]; !exists { req.Workspace.directories[dir] = workspace.ObjectInfo{ ObjectType: "DIRECTORY", @@ -199,7 +186,6 @@ func AddDefaultHandlers(server *Server) { } } } - return Response{} }) From 4bbbe571e0994d52e38f3b0579ef10937eee9505 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 9 Jan 2026 09:14:23 +0000 Subject: [PATCH 13/18] Fix whitespace: add trailing newlines to acceptance test files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- acceptance/cmd/fs/cp/dir-to-dir/localdir/file1.txt | 2 +- acceptance/cmd/fs/cp/dir-to-dir/localdir/file2.txt | 2 +- acceptance/cmd/fs/cp/file-to-dir/local.txt | 2 +- acceptance/cmd/fs/cp/file-to-file/local.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/acceptance/cmd/fs/cp/dir-to-dir/localdir/file1.txt b/acceptance/cmd/fs/cp/dir-to-dir/localdir/file1.txt index 2e80f50e92..d9039017ab 100644 --- a/acceptance/cmd/fs/cp/dir-to-dir/localdir/file1.txt +++ b/acceptance/cmd/fs/cp/dir-to-dir/localdir/file1.txt @@ -1 +1 @@ -file1 content \ No newline at end of file +file1 content diff --git a/acceptance/cmd/fs/cp/dir-to-dir/localdir/file2.txt b/acceptance/cmd/fs/cp/dir-to-dir/localdir/file2.txt index 79cd8cc836..f3c77b12c6 100644 --- a/acceptance/cmd/fs/cp/dir-to-dir/localdir/file2.txt +++ b/acceptance/cmd/fs/cp/dir-to-dir/localdir/file2.txt @@ -1 +1 @@ -file2 content \ No newline at end of file +file2 content diff --git a/acceptance/cmd/fs/cp/file-to-dir/local.txt b/acceptance/cmd/fs/cp/file-to-dir/local.txt index bc7774a7b1..a042389697 100644 --- a/acceptance/cmd/fs/cp/file-to-dir/local.txt +++ b/acceptance/cmd/fs/cp/file-to-dir/local.txt @@ -1 +1 @@ -hello world! \ No newline at end of file +hello world! diff --git a/acceptance/cmd/fs/cp/file-to-file/local.txt b/acceptance/cmd/fs/cp/file-to-file/local.txt index bc7774a7b1..a042389697 100644 --- a/acceptance/cmd/fs/cp/file-to-file/local.txt +++ b/acceptance/cmd/fs/cp/file-to-file/local.txt @@ -1 +1 @@ -hello world! \ No newline at end of file +hello world! From 1f331ff6e77e02e7ecc4ca7cbb928642c0c809fc Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 9 Jan 2026 09:33:00 +0000 Subject: [PATCH 14/18] Fix acceptance test output formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing blank lines and trailing newlines in fs cp test outputs to match actual command output format. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- acceptance/cmd/fs/cp/dir-to-dir/output.txt | 3 ++- acceptance/cmd/fs/cp/file-to-dir/output.txt | 2 +- acceptance/cmd/fs/cp/file-to-file/output.txt | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/acceptance/cmd/fs/cp/dir-to-dir/output.txt b/acceptance/cmd/fs/cp/dir-to-dir/output.txt index 47387471a8..56426bf8e0 100644 --- a/acceptance/cmd/fs/cp/dir-to-dir/output.txt +++ b/acceptance/cmd/fs/cp/dir-to-dir/output.txt @@ -5,5 +5,6 @@ localdir/file2.txt -> dbfs:/Volumes/main/default/data/uploaded-dir/file2.txt >>> [CLI] fs cat dbfs:/Volumes/main/default/data/uploaded-dir/file1.txt file1 content + >>> [CLI] fs cat dbfs:/Volumes/main/default/data/uploaded-dir/file2.txt -file2 content \ No newline at end of file +file2 content diff --git a/acceptance/cmd/fs/cp/file-to-dir/output.txt b/acceptance/cmd/fs/cp/file-to-dir/output.txt index 3247705e69..3be1171e97 100644 --- a/acceptance/cmd/fs/cp/file-to-dir/output.txt +++ b/acceptance/cmd/fs/cp/file-to-dir/output.txt @@ -3,4 +3,4 @@ local.txt -> dbfs:/Volumes/main/default/data/mydir/local.txt >>> [CLI] fs cat dbfs:/Volumes/main/default/data/mydir/local.txt -hello world! \ No newline at end of file +hello world! diff --git a/acceptance/cmd/fs/cp/file-to-file/output.txt b/acceptance/cmd/fs/cp/file-to-file/output.txt index 60ca172f6e..175dd73ecb 100644 --- a/acceptance/cmd/fs/cp/file-to-file/output.txt +++ b/acceptance/cmd/fs/cp/file-to-file/output.txt @@ -4,8 +4,9 @@ local.txt -> dbfs:/Volumes/main/default/data/uploaded.txt >>> [CLI] fs cat dbfs:/Volumes/main/default/data/uploaded.txt hello world! + >>> [CLI] fs cp dbfs:/Volumes/main/default/data/uploaded.txt downloaded.txt dbfs:/Volumes/main/default/data/uploaded.txt -> downloaded.txt >>> cat downloaded.txt -hello world! \ No newline at end of file +hello world! From f8bf48a078a18a0603eea0a865039652a36ad5b6 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 9 Jan 2026 09:59:28 +0000 Subject: [PATCH 15/18] Fix test server HEAD handler for volumes with file extension heuristic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HEAD handler for /api/2.0/fs/directories now uses a simple heuristic: if a path has a file extension (e.g., .txt, .json), it's assumed to be a file, not a directory. This avoids the need for complex state tracking or per-test Server overrides while handling the common case correctly. Also regenerated acceptance test output files to include proper blank lines between command outputs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- libs/testserver/handlers.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index 175a629155..9fb1408e7b 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -152,13 +152,22 @@ func AddDefaultHandlers(server *Server) { path = "/" + path } + // Heuristic: if path has a file extension (last . after last /), assume it's not a directory + lastSlash := strings.LastIndexAny(path, "/\\") + lastDot := strings.LastIndex(path, ".") + if lastDot > lastSlash && lastDot > 0 { + // Has a file extension like .txt, .json, etc. + return Response{StatusCode: 404} + } + if req.Workspace.FileExists(path) { return Response{StatusCode: 404} } if req.Workspace.DirectoryExists(path) { return Response{StatusCode: 200} } - return Response{StatusCode: 404} + // For volumes or other paths, assume it could be a directory + return Response{StatusCode: 200} }) server.Handle("HEAD", "/api/2.0/fs/files/{path:.*}", func(req Request) any { From 22da2b322035d355ecb89b16f7904390524bb939 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 9 Jan 2026 10:41:14 +0000 Subject: [PATCH 16/18] Improve documentation for test server file extension heuristic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added comprehensive documentation explaining: - WHY: test server doesn't track UC Volumes state - WHAT: the problem of checking paths before they exist - HOW: file extension heuristic solves the common case - Assumptions and limitations Also simplified code by inlining lastSlash comparison and ensured all comments start with capital letters and end with periods. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- libs/testserver/handlers.go | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index 9fb1408e7b..baf5fd41a1 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -152,21 +152,34 @@ func AddDefaultHandlers(server *Server) { path = "/" + path } - // Heuristic: if path has a file extension (last . after last /), assume it's not a directory - lastSlash := strings.LastIndexAny(path, "/\\") + // The test server doesn't track Unity Catalog Volumes file/directory state. + // When the CLI checks if a path like "/Volumes/.../file.txt" is a directory + // (via HEAD request) before creating it (via PUT request), the path doesn't + // exist yet, so we have no state to check. We must answer based on the path + // string alone. + // + // Use a simple heuristic: if the path has a file extension (e.g., .txt), + // assume it's a file, not a directory. This handles the common case where + // files have extensions and directories don't. + // + // Assumption: paths with extensions like .txt, .json, .yaml are files. + // Limitation: this doesn't handle extensionless files or directories with + // dots in their names. For such cases, use test-specific Server overrides + // in test.toml files. lastDot := strings.LastIndex(path, ".") - if lastDot > lastSlash && lastDot > 0 { - // Has a file extension like .txt, .json, etc. + if lastDot > strings.LastIndexAny(path, "/\\") { return Response{StatusCode: 404} } + // Check tracked workspace files and directories. if req.Workspace.FileExists(path) { return Response{StatusCode: 404} } if req.Workspace.DirectoryExists(path) { return Response{StatusCode: 200} } - // For volumes or other paths, assume it could be a directory + + // For untracked paths (e.g., Unity Catalog Volumes), assume directory. return Response{StatusCode: 200} }) @@ -183,9 +196,10 @@ func AddDefaultHandlers(server *Server) { if !strings.HasPrefix(dirPath, "/") { dirPath = "/" + dirPath } + defer req.Workspace.LockUnlock()() - // Create directory and all parents. + // Create directory and all parent directories. for dir := dirPath; dir != "/" && dir != ""; dir = path.Dir(dir) { if _, exists := req.Workspace.directories[dir]; !exists { req.Workspace.directories[dir] = workspace.ObjectInfo{ From 674fa2ec12ff83b194d66661c59d42f341979794 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 9 Jan 2026 10:41:14 +0000 Subject: [PATCH 17/18] Improve test server HEAD handler with volume-specific heuristic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The file extension heuristic is now only applied to Unity Catalog Volumes paths (/Volumes/...), which makes semantic sense since: 1. Workspace files/directories: use tracked state (correct) 2. Volume paths: use heuristic (necessary - we don't track volume state) 3. Non-existent workspace paths: return 404 (correct) Previously, the handler returned 200 for ALL non-existent paths, which was semantically incorrect. Now it only assumes directories exist for volume paths where we genuinely don't have state to check. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- libs/testserver/handlers.go | 38 ++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index 9fb1408e7b..5a5327f5a4 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -148,26 +148,41 @@ func AddDefaultHandlers(server *Server) { server.Handle("HEAD", "/api/2.0/fs/directories/{path:.*}", func(req Request) any { path := req.Vars["path"] + // Normalize path to always start with "/". if !strings.HasPrefix(path, "/") { path = "/" + path } - // Heuristic: if path has a file extension (last . after last /), assume it's not a directory - lastSlash := strings.LastIndexAny(path, "/\\") - lastDot := strings.LastIndex(path, ".") - if lastDot > lastSlash && lastDot > 0 { - // Has a file extension like .txt, .json, etc. - return Response{StatusCode: 404} - } - + // Check tracked workspace files and directories first. if req.Workspace.FileExists(path) { return Response{StatusCode: 404} } if req.Workspace.DirectoryExists(path) { return Response{StatusCode: 200} } - // For volumes or other paths, assume it could be a directory - return Response{StatusCode: 200} + + // Unity Catalog Volumes paths (/Volumes/...) are not tracked by the test server. + // When the CLI checks if a volume path is a directory (via HEAD request) before + // creating it (via PUT request), the path doesn't exist yet in our state. + // + // Use a simple heuristic: if the path has a file extension (e.g., .txt), + // assume it's a file, not a directory. This handles the common case where + // files have extensions and directories don't. + // + // Assumption: paths with extensions like .txt, .json, .yaml are files. + // Limitation: this doesn't handle extensionless files or directories with + // dots in their names. For such cases, use test-specific Server overrides + // in test.toml files. + if strings.HasPrefix(path, "/Volumes/") { + lastDot := strings.LastIndex(path, ".") + if lastDot > strings.LastIndexAny(path, "/\\") { + return Response{StatusCode: 404} + } + return Response{StatusCode: 200} + } + + // Non-existent workspace path. + return Response{StatusCode: 404} }) server.Handle("HEAD", "/api/2.0/fs/files/{path:.*}", func(req Request) any { @@ -183,9 +198,10 @@ func AddDefaultHandlers(server *Server) { if !strings.HasPrefix(dirPath, "/") { dirPath = "/" + dirPath } + defer req.Workspace.LockUnlock()() - // Create directory and all parents. + // Create directory and all parent directories. for dir := dirPath; dir != "/" && dir != ""; dir = path.Dir(dir) { if _, exists := req.Workspace.directories[dir]; !exists { req.Workspace.directories[dir] = workspace.ObjectInfo{ From fdfc0e12c913cc23e152161f40ef1b04ff2eb54c Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 9 Jan 2026 10:56:54 +0000 Subject: [PATCH 18/18] Improve test server HEAD handler for Unity Catalog Volumes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply file extension heuristic only to /Volumes/ paths where state isn't tracked. Return correct 404 for non-existent workspace paths. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- libs/testserver/handlers.go | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index 5a5327f5a4..a96274df39 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -161,18 +161,14 @@ func AddDefaultHandlers(server *Server) { return Response{StatusCode: 200} } - // Unity Catalog Volumes paths (/Volumes/...) are not tracked by the test server. - // When the CLI checks if a volume path is a directory (via HEAD request) before - // creating it (via PUT request), the path doesn't exist yet in our state. + // Unity Catalog Volumes paths (/Volumes/...) are not tracked by the + // test server. When the CLI checks if a volume path is a directory + // (via HEAD request) before creating it (via PUT request), the path + // doesn't exist yet in our state. // - // Use a simple heuristic: if the path has a file extension (e.g., .txt), - // assume it's a file, not a directory. This handles the common case where + // Use a simple heuristic: if the path has a file extension then assume + // it's a file, not a directory. This handles the common case where // files have extensions and directories don't. - // - // Assumption: paths with extensions like .txt, .json, .yaml are files. - // Limitation: this doesn't handle extensionless files or directories with - // dots in their names. For such cases, use test-specific Server overrides - // in test.toml files. if strings.HasPrefix(path, "/Volumes/") { lastDot := strings.LastIndex(path, ".") if lastDot > strings.LastIndexAny(path, "/\\") {