From d5f7567c38cd7baa38d5681cec13c81cd3bf2967 Mon Sep 17 00:00:00 2001 From: Jan Rose Date: Thu, 11 Jun 2026 20:23:21 +0200 Subject: [PATCH 1/2] Only show installable projects in 'databricks labs list' 'databricks labs list' showed every non-archived, non-fork repository in the databrickslabs GitHub org (currently 39), but only repositories that ship a labs.yml manifest at the root of their release tag can actually be installed (currently 8). Everything else failed 'databricks labs install' with a not-found error. Filter the listing to repositories that have a root labs.yml on their default branch, checked concurrently via raw.githubusercontent.com (not subject to the low unauthenticated GitHub API rate limit) and cached for 24 hours like the repository list itself. Co-authored-by: Isaac --- cmd/labs/list.go | 75 ++++++++++++++++--- cmd/labs/list_test.go | 39 ++++++++++ ...tabrickslabs-installable-repositories.json | 21 ++++++ 3 files changed, 126 insertions(+), 9 deletions(-) create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json diff --git a/cmd/labs/list.go b/cmd/labs/list.go index 171f956a1e7..1a80cb1867a 100644 --- a/cmd/labs/list.go +++ b/cmd/labs/list.go @@ -2,11 +2,20 @@ package labs import ( "context" + "errors" + "time" "github.com/databricks/cli/cmd/labs/github" + "github.com/databricks/cli/cmd/labs/localcache" "github.com/databricks/cli/cmd/labs/project" "github.com/databricks/cli/libs/cmdio" "github.com/spf13/cobra" + "golang.org/x/sync/errgroup" +) + +const ( + labsOrg = "databrickslabs" + installableCacheTTL = 24 * time.Hour ) type labsMeta struct { @@ -20,14 +29,68 @@ func allRepos(ctx context.Context) (github.Repositories, error) { if err != nil { return nil, err } - cache := github.NewRepositoryCache("databrickslabs", cacheDir) + cache := github.NewRepositoryCache(labsOrg, cacheDir) return cache.Load(ctx) } +// installableRepos returns the org repositories that `databricks labs install` can +// actually install. Most repositories in the org don't ship a labs.yml manifest +// (e.g. libraries published to package indexes), so listing them would only +// advertise projects that fail to install. +func installableRepos(ctx context.Context) (github.Repositories, error) { + cacheDir, err := project.PathInLabs(ctx) + if err != nil { + return nil, err + } + cache := localcache.NewLocalCache[github.Repositories](cacheDir, labsOrg+"-installable-repositories", installableCacheTTL) + return cache.Load(ctx, func() (github.Repositories, error) { + repos, err := allRepos(ctx) + if err != nil { + return nil, err + } + return filterInstallable(ctx, repos) + }) +} + +// filterInstallable keeps repositories that have a root labs.yml manifest on their +// default branch. The manifest is fetched from raw.githubusercontent.com, which is +// not subject to the low unauthenticated GitHub API rate limit. +func filterInstallable(ctx context.Context, repos github.Repositories) (github.Repositories, error) { + installable := make([]bool, len(repos)) + g, gctx := errgroup.WithContext(ctx) + g.SetLimit(10) + for i, repo := range repos { + if repo.IsArchived || repo.IsFork { + continue + } + g.Go(func() error { + _, err := github.ReadFileFromRef(gctx, labsOrg, repo.Name, repo.DefaultBranch, "labs.yml") + if errors.Is(err, github.ErrNotFound) { + return nil + } + if err != nil { + return err + } + installable[i] = true + return nil + }) + } + if err := g.Wait(); err != nil { + return nil, err + } + var out github.Repositories + for i, repo := range repos { + if installable[i] { + out = append(out, repo) + } + } + return out, nil +} + func newListCommand() *cobra.Command { return &cobra.Command{ Use: "list", - Short: "List all labs", + Short: "List labs that can be installed", Annotations: map[string]string{ "template": cmdio.Heredoc(` Name Description @@ -37,18 +100,12 @@ func newListCommand() *cobra.Command { }, RunE: func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() - repositories, err := allRepos(ctx) + repositories, err := installableRepos(ctx) if err != nil { return err } var info []labsMeta for _, v := range repositories { - if v.IsArchived { - continue - } - if v.IsFork { - continue - } description := v.Description if len(description) > 50 { description = description[:50] + "..." diff --git a/cmd/labs/list_test.go b/cmd/labs/list_test.go index d1f763a7f33..93120eef0f8 100644 --- a/cmd/labs/list_test.go +++ b/cmd/labs/list_test.go @@ -1,10 +1,14 @@ package labs_test import ( + "net/http" + "net/http/httptest" "testing" + "github.com/databricks/cli/cmd/labs/github" "github.com/databricks/cli/internal/testcli" "github.com/databricks/cli/libs/env" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -15,4 +19,39 @@ func TestListingWorks(t *testing.T) { stdout, _, err := c.Run() require.NoError(t, err) require.Contains(t, stdout.String(), "ucx") + // blueprint is in the repositories cache fixture but not in the + // installable-repositories cache fixture, proving the latter is rendered. + require.NotContains(t, stdout.String(), "blueprint") +} + +func TestListingFiltersReposWithoutLabsYml(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/users/databrickslabs/repos": + _, err := w.Write([]byte(`[ + {"name": "ucx", "description": "Unity Catalog Migrations", "default_branch": "main"}, + {"name": "brickster", "description": "R interface to Databricks", "default_branch": "main"} + ]`)) + assert.NoError(t, err) + case "/databrickslabs/ucx/main/labs.yml": + _, err := w.Write([]byte("name: ucx")) + assert.NoError(t, err) + case "/databrickslabs/brickster/main/labs.yml": + w.WriteHeader(http.StatusNotFound) + default: + t.Logf("Requested: %s", r.URL.Path) + t.FailNow() + } + })) + defer server.Close() + ctx := t.Context() + ctx = github.WithApiOverride(ctx, server.URL) + ctx = github.WithUserContentOverride(ctx, server.URL) + ctx = env.WithUserHomeDir(ctx, t.TempDir()) + + c := testcli.NewRunner(t, ctx, "labs", "list") + stdout, _, err := c.Run() + require.NoError(t, err) + require.Contains(t, stdout.String(), "ucx") + require.NotContains(t, stdout.String(), "brickster") } diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json new file mode 100644 index 00000000000..063ca11bd5d --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json @@ -0,0 +1,21 @@ +{ + "refreshed_at": "2033-01-01T00:00:00.92857+02:00", + "data": [ + { + "name": "ucx", + "description": "Unity Catalog Migrations", + "language": "Python", + "default_branch": "main", + "stargazers_count": 100500, + "fork": false, + "archived": false, + "topics": [], + "html_url": "https://github.com/databrickslabs/ucx", + "clone_url": "https://github.com/databrickslabs/ucx.git", + "ssh_url": "git@github.com:databrickslabs/ucx.git", + "license": { + "name": "Other" + } + } + ] +} From 8ed1722aa45642d569e8ae587daab2b871aa604e Mon Sep 17 00:00:00 2001 From: Jan Rose Date: Thu, 11 Jun 2026 20:42:43 +0200 Subject: [PATCH 2/2] Add changelog entry Co-authored-by: Isaac --- NEXT_CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index fe3511e4b21..6c9187806c6 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -6,6 +6,7 @@ ### CLI * Show a once-per-day notice after a command when a newer CLI release is available, with a link to the release and the upgrade command for the detected install method. Suppressed for non-interactive/CI runs, JSON output, the Databricks Runtime, and development builds, and can be disabled with `DATABRICKS_CLI_DISABLE_UPDATE_CHECK` ([#5470](https://github.com/databricks/cli/pull/5470)). +* `databricks labs list` now only shows projects that can be installed (those shipping a `labs.yml` manifest), and `databricks labs install` explains when a project does not provide one instead of failing with a generic "not found" error ([#5559](https://github.com/databricks/cli/pull/5559), [#5560](https://github.com/databricks/cli/pull/5560)). ### Bundles * Remove API enum values and types that are still in development from the `databricks-bundles` Python package; these were never accepted by the backend ([#5484](https://github.com/databricks/cli/pull/5484)).