diff --git a/Taskfile.yml b/Taskfile.yml index d72140290e..32cb14d0c4 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -4,7 +4,7 @@ vars: # Absolute path so tasks with `dir:` (lint-go-tools, lint-go-codegen) can use it. GO_TOOL: go tool -modfile={{.ROOT_DIR}}/tools/go.mod EXE_EXT: '{{if eq OS "windows"}}.exe{{end}}' - TEST_PACKAGES: ./acceptance/internal ./libs/... ./internal/... ./cmd/... ./bundle/... ./experimental/ssh/... . + TEST_PACKAGES: ./acceptance/internal ./libs/... ./internal/... ./cmd/... ./bundle/... ./experimental/air/... ./experimental/ssh/... . ACCEPTANCE_TEST_FILTER: "" # Single brace-expansion glob covering every //go:embed target in the repo, # computed by grepping `//go:embed` directives. Evaluated lazily by Task so diff --git a/acceptance/experimental/air/help/out.test.toml b/acceptance/experimental/air/help/out.test.toml new file mode 100644 index 0000000000..d6187dcb04 --- /dev/null +++ b/acceptance/experimental/air/help/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = [] diff --git a/acceptance/experimental/air/help/output.txt b/acceptance/experimental/air/help/output.txt new file mode 100644 index 0000000000..3a0f86e164 --- /dev/null +++ b/acceptance/experimental/air/help/output.txt @@ -0,0 +1,29 @@ + +=== help +>>> [CLI] experimental air --help +Run and manage AI runtime training workloads on Databricks serverless GPU compute. + +This command set is the Go port of the standalone Python "air" CLI. It is +experimental and may change in future versions. + +Usage: + databricks experimental air [command] + +Available Commands: + cancel Cancel one or more runs + get Show details for a run + list List recent runs + logs Stream or fetch logs for a run + register-image Mirror a Docker image into the workspace registry + run Submit a training workload from a YAML config + +Flags: + -h, --help help for air + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + +Use "databricks experimental air [command] --help" for more information about a command. diff --git a/acceptance/experimental/air/help/script b/acceptance/experimental/air/help/script new file mode 100644 index 0000000000..cd67a6fc1b --- /dev/null +++ b/acceptance/experimental/air/help/script @@ -0,0 +1,5 @@ +# Pin the command tree so any change to a subcommand or its short description +# shows up as a diff here. + +title "help" +trace $CLI experimental air --help diff --git a/acceptance/experimental/air/help/test.toml b/acceptance/experimental/air/help/test.toml new file mode 100644 index 0000000000..49709b578e --- /dev/null +++ b/acceptance/experimental/air/help/test.toml @@ -0,0 +1,3 @@ +# --help prints without authenticating, so no server stubs are needed. +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = [] diff --git a/acceptance/experimental/air/unimplemented/out.test.toml b/acceptance/experimental/air/unimplemented/out.test.toml new file mode 100644 index 0000000000..d6187dcb04 --- /dev/null +++ b/acceptance/experimental/air/unimplemented/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = [] diff --git a/acceptance/experimental/air/unimplemented/output.txt b/acceptance/experimental/air/unimplemented/output.txt new file mode 100644 index 0000000000..4a07a38a37 --- /dev/null +++ b/acceptance/experimental/air/unimplemented/output.txt @@ -0,0 +1,36 @@ + +=== run +>>> [CLI] experimental air run +Error: `air run` is not implemented yet + +Exit code: 1 + +=== get +>>> [CLI] experimental air get 123 +Error: `air get` is not implemented yet + +Exit code: 1 + +=== list +>>> [CLI] experimental air list +Error: `air list` is not implemented yet + +Exit code: 1 + +=== logs +>>> [CLI] experimental air logs 123 +Error: `air logs` is not implemented yet + +Exit code: 1 + +=== cancel +>>> [CLI] experimental air cancel 123 +Error: `air cancel` is not implemented yet + +Exit code: 1 + +=== register-image +>>> [CLI] experimental air register-image my-image:latest +Error: `air register-image` is not implemented yet + +Exit code: 1 diff --git a/acceptance/experimental/air/unimplemented/script b/acceptance/experimental/air/unimplemented/script new file mode 100644 index 0000000000..2ed885c0e6 --- /dev/null +++ b/acceptance/experimental/air/unimplemented/script @@ -0,0 +1,19 @@ +# Each stub must fail with "not implemented"; errcode records the exit code. + +title "run" +errcode trace $CLI experimental air run + +title "get" +errcode trace $CLI experimental air get 123 + +title "list" +errcode trace $CLI experimental air list + +title "logs" +errcode trace $CLI experimental air logs 123 + +title "cancel" +errcode trace $CLI experimental air cancel 123 + +title "register-image" +errcode trace $CLI experimental air register-image my-image:latest diff --git a/acceptance/experimental/air/unimplemented/test.toml b/acceptance/experimental/air/unimplemented/test.toml new file mode 100644 index 0000000000..c233c30a86 --- /dev/null +++ b/acceptance/experimental/air/unimplemented/test.toml @@ -0,0 +1,3 @@ +# Stubs fail locally before any API call, so no server stubs needed. +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = [] diff --git a/cmd/experimental/experimental.go b/cmd/experimental/experimental.go index 8d9827c5c9..d87c893abc 100644 --- a/cmd/experimental/experimental.go +++ b/cmd/experimental/experimental.go @@ -1,6 +1,7 @@ package experimental import ( + aircmd "github.com/databricks/cli/experimental/air/cmd" aitoolscmd "github.com/databricks/cli/experimental/aitools/cmd" geniecmd "github.com/databricks/cli/experimental/genie/cmd" postgrescmd "github.com/databricks/cli/experimental/postgres/cmd" @@ -22,6 +23,7 @@ These commands provide early access to new features that are still under development. They may change or be removed in future versions without notice.`, } + cmd.AddCommand(aircmd.New()) cmd.AddCommand(aitoolscmd.NewAitoolsCmd()) cmd.AddCommand(geniecmd.NewGenieCmd()) cmd.AddCommand(postgrescmd.New()) diff --git a/experimental/air/cmd/air.go b/experimental/air/cmd/air.go new file mode 100644 index 0000000000..81ffb2dd34 --- /dev/null +++ b/experimental/air/cmd/air.go @@ -0,0 +1,36 @@ +package aircmd + +import ( + "fmt" + + "github.com/spf13/cobra" +) + +// New returns the root command for the experimental AI runtime CLI. +// +// Milestone 0: scaffolds the command group with every subcommand registered as a +// stub (not yet implemented), pending the port from the Python `air` CLI. +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "air", + Short: "Run and manage AI runtime training workloads", + Long: `Run and manage AI runtime training workloads on Databricks serverless GPU compute. + +This command set is the Go port of the standalone Python "air" CLI. It is +experimental and may change in future versions.`, + } + + cmd.AddCommand(newRunCommand()) + cmd.AddCommand(newGetCommand()) + cmd.AddCommand(newListCommand()) + cmd.AddCommand(newLogsCommand()) + cmd.AddCommand(newCancelCommand()) + cmd.AddCommand(newRegisterImageCommand()) + + return cmd +} + +// notImplemented returns the placeholder error used by milestone-0 stubs. +func notImplemented(name string) error { + return fmt.Errorf("`air %s` is not implemented yet", name) +} diff --git a/experimental/air/cmd/air_test.go b/experimental/air/cmd/air_test.go new file mode 100644 index 0000000000..7efac253a2 --- /dev/null +++ b/experimental/air/cmd/air_test.go @@ -0,0 +1,22 @@ +package aircmd + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// TestNewRegistersAllSubcommands asserts the `air` command wires up every +// expected subcommand, so none is accidentally dropped from New. +func TestNewRegistersAllSubcommands(t *testing.T) { + registered := make(map[string]bool) + for _, c := range New().Commands() { + registered[c.Name()] = true + } + + want := []string{"run", "get", "list", "logs", "cancel", "register-image"} + for _, name := range want { + assert.True(t, registered[name], "subcommand %q is not registered", name) + } + assert.Len(t, registered, len(want), "unexpected number of subcommands") +} diff --git a/experimental/air/cmd/cancel.go b/experimental/air/cmd/cancel.go new file mode 100644 index 0000000000..ad7fffc712 --- /dev/null +++ b/experimental/air/cmd/cancel.go @@ -0,0 +1,39 @@ +package aircmd + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/spf13/cobra" +) + +func newCancelCommand() *cobra.Command { + var ( + all bool + yes bool + ) + + cmd := &cobra.Command{ + Use: "cancel [RUN_ID...]", + Short: "Cancel one or more runs", + Long: `Cancel one or more runs by ID, or cancel all of your active runs with --all.`, + RunE: func(cmd *cobra.Command, args []string) error { + return notImplemented("cancel") + }, + } + + cmd.Flags().BoolVar(&all, "all", false, "Cancel all of your active runs") + cmd.Flags().BoolVarP(&yes, "yes", "y", false, "Skip the confirmation prompt") + + // Require exactly one of: one or more RUN_IDs, or --all. Cobra parses flags + // before running this, so `all` reflects the user's input. + cmd.Args = func(cmd *cobra.Command, args []string) error { + switch { + case all && len(args) > 0: + return &root.InvalidArgsError{Command: cmd, Message: "cannot combine RUN_ID arguments with --all"} + case !all && len(args) == 0: + return &root.InvalidArgsError{Command: cmd, Message: "provide at least one RUN_ID, or use --all"} + } + return nil + } + + return cmd +} diff --git a/experimental/air/cmd/get.go b/experimental/air/cmd/get.go new file mode 100644 index 0000000000..0ab0b8226b --- /dev/null +++ b/experimental/air/cmd/get.go @@ -0,0 +1,19 @@ +package aircmd + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/spf13/cobra" +) + +func newGetCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "get RUN_ID", + Args: root.ExactArgs(1), + Short: "Show details for a run", + RunE: func(cmd *cobra.Command, args []string) error { + return notImplemented("get") + }, + } + + return cmd +} diff --git a/experimental/air/cmd/list.go b/experimental/air/cmd/list.go new file mode 100644 index 0000000000..bf24cff9b2 --- /dev/null +++ b/experimental/air/cmd/list.go @@ -0,0 +1,31 @@ +package aircmd + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/spf13/cobra" +) + +func newListCommand() *cobra.Command { + var ( + limit int + active bool + allUsers bool + filters []string + ) + + cmd := &cobra.Command{ + Use: "list", + Args: root.NoArgs, + Short: "List recent runs", + RunE: func(cmd *cobra.Command, args []string) error { + return notImplemented("list") + }, + } + + cmd.Flags().IntVar(&limit, "limit", 20, "Maximum number of runs to show") + cmd.Flags().BoolVar(&active, "active", false, "Show only active runs") + cmd.Flags().BoolVar(&allUsers, "all-users", false, "Show runs from all users") + cmd.Flags().StringArrayVar(&filters, "filter", nil, "Filter runs, e.g. experiment=foo* (repeatable)") + + return cmd +} diff --git a/experimental/air/cmd/logs.go b/experimental/air/cmd/logs.go new file mode 100644 index 0000000000..4dbbe41c27 --- /dev/null +++ b/experimental/air/cmd/logs.go @@ -0,0 +1,34 @@ +package aircmd + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/spf13/cobra" +) + +func newLogsCommand() *cobra.Command { + var ( + node int + lines int + retry int + downloadTo string + review bool + ) + + cmd := &cobra.Command{ + Use: "logs RUN_ID", + Args: root.ExactArgs(1), + Short: "Stream or fetch logs for a run", + Long: `Stream logs from an active run, or fetch logs from a completed run.`, + RunE: func(cmd *cobra.Command, args []string) error { + return notImplemented("logs") + }, + } + + cmd.Flags().IntVar(&node, "node", 0, "Fetch logs from this node") + cmd.Flags().IntVar(&lines, "lines", 10000, "For completed runs, print the last N lines") + cmd.Flags().IntVar(&retry, "retry", -1, "View logs from a specific retry attempt; -1 means latest") + cmd.Flags().StringVar(&downloadTo, "download-to", "", "Download all logs to this directory instead of printing") + cmd.Flags().BoolVar(&review, "review", false, "Download logs from all nodes and filter for error signatures") + + return cmd +} diff --git a/experimental/air/cmd/register_image.go b/experimental/air/cmd/register_image.go new file mode 100644 index 0000000000..a5be3df408 --- /dev/null +++ b/experimental/air/cmd/register_image.go @@ -0,0 +1,33 @@ +package aircmd + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/spf13/cobra" +) + +func newRegisterImageCommand() *cobra.Command { + var ( + scope string + key string + interactiveAuth bool + tagPolicy string + timeoutMinutes int + ) + + cmd := &cobra.Command{ + Use: "register-image IMAGE_URL", + Args: root.ExactArgs(1), + Short: "Mirror a Docker image into the workspace registry", + RunE: func(cmd *cobra.Command, args []string) error { + return notImplemented("register-image") + }, + } + + cmd.Flags().StringVar(&scope, "scope", "", "Databricks secret scope holding registry credentials") + cmd.Flags().StringVar(&key, "key", "", "Databricks secret key holding registry credentials") + cmd.Flags().BoolVar(&interactiveAuth, "interactive-authenticate", false, "Prompt for registry credentials and store them as a secret") + cmd.Flags().StringVar(&tagPolicy, "tag-policy", "auto", "Image resolution policy: auto or latest") + cmd.Flags().IntVar(&timeoutMinutes, "timeout-minutes", 60, "Timeout to wait for the image to become available") + + return cmd +} diff --git a/experimental/air/cmd/run.go b/experimental/air/cmd/run.go new file mode 100644 index 0000000000..0bc3d1fd94 --- /dev/null +++ b/experimental/air/cmd/run.go @@ -0,0 +1,36 @@ +package aircmd + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/spf13/cobra" +) + +func newRunCommand() *cobra.Command { + var ( + file string + watch bool + overrides []string + dryRun bool + idempotencyKey string + ) + + cmd := &cobra.Command{ + Use: "run", + Args: root.NoArgs, + Short: "Submit a training workload from a YAML config", + Long: `Submit a training workload to Databricks serverless GPU compute. + +The workload is described by a YAML config file (see --file).`, + RunE: func(cmd *cobra.Command, args []string) error { + return notImplemented("run") + }, + } + + cmd.Flags().StringVarP(&file, "file", "f", "", "Path to the workload YAML config") + cmd.Flags().BoolVar(&watch, "watch", false, "Stream logs until the run completes") + cmd.Flags().StringArrayVar(&overrides, "override", nil, "Override a YAML field, e.g. compute.num_accelerators=8 (repeatable)") + cmd.Flags().BoolVar(&dryRun, "dry-run", false, "Validate the config without submitting") + cmd.Flags().StringVar(&idempotencyKey, "idempotency-key", "", "Return the existing run if this key was already used") + + return cmd +} diff --git a/experimental/air/cmd/stubs_test.go b/experimental/air/cmd/stubs_test.go new file mode 100644 index 0000000000..a6e24177f3 --- /dev/null +++ b/experimental/air/cmd/stubs_test.go @@ -0,0 +1,31 @@ +package aircmd + +import ( + "fmt" + "testing" + + "github.com/spf13/cobra" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestStubCommandsReturnNotImplemented asserts each unimplemented subcommand +// fails with a "not implemented" error. Drop a command here once it lands. +func TestStubCommandsReturnNotImplemented(t *testing.T) { + stubs := map[string]*cobra.Command{ + "run": newRunCommand(), + "get": newGetCommand(), + "list": newListCommand(), + "logs": newLogsCommand(), + "cancel": newCancelCommand(), + "register-image": newRegisterImageCommand(), + } + + for name, cmd := range stubs { + t.Run(name, func(t *testing.T) { + require.NotNil(t, cmd.RunE, "command should define RunE") + err := cmd.RunE(cmd, nil) + assert.EqualError(t, err, fmt.Sprintf("`air %s` is not implemented yet", name)) + }) + } +}