diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index fe3511e4b21..6c9187806c6 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -6,6 +6,7 @@ ### CLI * Show a once-per-day notice after a command when a newer CLI release is available, with a link to the release and the upgrade command for the detected install method. Suppressed for non-interactive/CI runs, JSON output, the Databricks Runtime, and development builds, and can be disabled with `DATABRICKS_CLI_DISABLE_UPDATE_CHECK` ([#5470](https://github.com/databricks/cli/pull/5470)). +* `databricks labs list` now only shows projects that can be installed (those shipping a `labs.yml` manifest), and `databricks labs install` explains when a project does not provide one instead of failing with a generic "not found" error ([#5559](https://github.com/databricks/cli/pull/5559), [#5560](https://github.com/databricks/cli/pull/5560)). ### Bundles * Remove API enum values and types that are still in development from the `databricks-bundles` Python package; these were never accepted by the backend ([#5484](https://github.com/databricks/cli/pull/5484)). diff --git a/cmd/labs/list.go b/cmd/labs/list.go index 171f956a1e7..1a80cb1867a 100644 --- a/cmd/labs/list.go +++ b/cmd/labs/list.go @@ -2,11 +2,20 @@ package labs import ( "context" + "errors" + "time" "github.com/databricks/cli/cmd/labs/github" + "github.com/databricks/cli/cmd/labs/localcache" "github.com/databricks/cli/cmd/labs/project" "github.com/databricks/cli/libs/cmdio" "github.com/spf13/cobra" + "golang.org/x/sync/errgroup" +) + +const ( + labsOrg = "databrickslabs" + installableCacheTTL = 24 * time.Hour ) type labsMeta struct { @@ -20,14 +29,68 @@ func allRepos(ctx context.Context) (github.Repositories, error) { if err != nil { return nil, err } - cache := github.NewRepositoryCache("databrickslabs", cacheDir) + cache := github.NewRepositoryCache(labsOrg, cacheDir) return cache.Load(ctx) } +// installableRepos returns the org repositories that `databricks labs install` can +// actually install. Most repositories in the org don't ship a labs.yml manifest +// (e.g. libraries published to package indexes), so listing them would only +// advertise projects that fail to install. +func installableRepos(ctx context.Context) (github.Repositories, error) { + cacheDir, err := project.PathInLabs(ctx) + if err != nil { + return nil, err + } + cache := localcache.NewLocalCache[github.Repositories](cacheDir, labsOrg+"-installable-repositories", installableCacheTTL) + return cache.Load(ctx, func() (github.Repositories, error) { + repos, err := allRepos(ctx) + if err != nil { + return nil, err + } + return filterInstallable(ctx, repos) + }) +} + +// filterInstallable keeps repositories that have a root labs.yml manifest on their +// default branch. The manifest is fetched from raw.githubusercontent.com, which is +// not subject to the low unauthenticated GitHub API rate limit. +func filterInstallable(ctx context.Context, repos github.Repositories) (github.Repositories, error) { + installable := make([]bool, len(repos)) + g, gctx := errgroup.WithContext(ctx) + g.SetLimit(10) + for i, repo := range repos { + if repo.IsArchived || repo.IsFork { + continue + } + g.Go(func() error { + _, err := github.ReadFileFromRef(gctx, labsOrg, repo.Name, repo.DefaultBranch, "labs.yml") + if errors.Is(err, github.ErrNotFound) { + return nil + } + if err != nil { + return err + } + installable[i] = true + return nil + }) + } + if err := g.Wait(); err != nil { + return nil, err + } + var out github.Repositories + for i, repo := range repos { + if installable[i] { + out = append(out, repo) + } + } + return out, nil +} + func newListCommand() *cobra.Command { return &cobra.Command{ Use: "list", - Short: "List all labs", + Short: "List labs that can be installed", Annotations: map[string]string{ "template": cmdio.Heredoc(` Name Description @@ -37,18 +100,12 @@ func newListCommand() *cobra.Command { }, RunE: func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() - repositories, err := allRepos(ctx) + repositories, err := installableRepos(ctx) if err != nil { return err } var info []labsMeta for _, v := range repositories { - if v.IsArchived { - continue - } - if v.IsFork { - continue - } description := v.Description if len(description) > 50 { description = description[:50] + "..." diff --git a/cmd/labs/list_test.go b/cmd/labs/list_test.go index d1f763a7f33..93120eef0f8 100644 --- a/cmd/labs/list_test.go +++ b/cmd/labs/list_test.go @@ -1,10 +1,14 @@ package labs_test import ( + "net/http" + "net/http/httptest" "testing" + "github.com/databricks/cli/cmd/labs/github" "github.com/databricks/cli/internal/testcli" "github.com/databricks/cli/libs/env" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -15,4 +19,39 @@ func TestListingWorks(t *testing.T) { stdout, _, err := c.Run() require.NoError(t, err) require.Contains(t, stdout.String(), "ucx") + // blueprint is in the repositories cache fixture but not in the + // installable-repositories cache fixture, proving the latter is rendered. + require.NotContains(t, stdout.String(), "blueprint") +} + +func TestListingFiltersReposWithoutLabsYml(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/users/databrickslabs/repos": + _, err := w.Write([]byte(`[ + {"name": "ucx", "description": "Unity Catalog Migrations", "default_branch": "main"}, + {"name": "brickster", "description": "R interface to Databricks", "default_branch": "main"} + ]`)) + assert.NoError(t, err) + case "/databrickslabs/ucx/main/labs.yml": + _, err := w.Write([]byte("name: ucx")) + assert.NoError(t, err) + case "/databrickslabs/brickster/main/labs.yml": + w.WriteHeader(http.StatusNotFound) + default: + t.Logf("Requested: %s", r.URL.Path) + t.FailNow() + } + })) + defer server.Close() + ctx := t.Context() + ctx = github.WithApiOverride(ctx, server.URL) + ctx = github.WithUserContentOverride(ctx, server.URL) + ctx = env.WithUserHomeDir(ctx, t.TempDir()) + + c := testcli.NewRunner(t, ctx, "labs", "list") + stdout, _, err := c.Run() + require.NoError(t, err) + require.Contains(t, stdout.String(), "ucx") + require.NotContains(t, stdout.String(), "brickster") } diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json new file mode 100644 index 00000000000..063ca11bd5d --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json @@ -0,0 +1,21 @@ +{ + "refreshed_at": "2033-01-01T00:00:00.92857+02:00", + "data": [ + { + "name": "ucx", + "description": "Unity Catalog Migrations", + "language": "Python", + "default_branch": "main", + "stargazers_count": 100500, + "fork": false, + "archived": false, + "topics": [], + "html_url": "https://github.com/databrickslabs/ucx", + "clone_url": "https://github.com/databrickslabs/ucx.git", + "ssh_url": "git@github.com:databrickslabs/ucx.git", + "license": { + "name": "Other" + } + } + ] +}