diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0c084461c..a7c6f6607 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -48,10 +48,7 @@ jobs: steps: - name: add dependencies run: apk add bash go - # Pinned a commit to make go version configurable. - # This should be safe to upgrade once this commit is in a released version: - # https://github.com/jidicula/go-fuzz-action/commit/23cc553941669144159507e2cccdbb4afc5b3076 - - uses: jidicula/go-fuzz-action@0206b61afc603b665297621fa5e691b1447a5e57 + - uses: jidicula/go-fuzz-action@2d8b802597c47a79764d83dabc27fb672f2fb8d9 with: packages: 'github.com/sourcegraph/zoekt' # This is the package where the Protobuf round trip tests are defined fuzz-time: 30s @@ -116,73 +113,3 @@ jobs: # Check if the generated code is up-to-date - run: .github/workflows/buf-generate-check.sh - # We build a shared docker image called "zoekt". This is not pushed, but is - # used for creating the indexserver and webserver images. - docker: - if: github.ref == 'refs/heads/main' - runs-on: ubuntu-latest - needs: - - "test" - - "shellcheck" - steps: - - name: checkout - uses: actions/checkout@v3 - - - name: version - id: version - run: .github/workflows/docker-version.sh - - - name: docker-meta-webserver - id: meta-webserver - uses: docker/metadata-action@v3 - with: - images: | - sourcegraph/zoekt-webserver - tags: | - type=ref,event=branch - type=ref,event=pr - type=semver,pattern={{version}} - type=sha - - name: docker-meta-indexserver - id: meta-indexserver - uses: docker/metadata-action@v3 - with: - images: | - sourcegraph/zoekt-indexserver - tags: | - type=ref,event=branch - type=ref,event=pr - type=semver,pattern={{version}} - type=sha - - - name: build-zoekt - uses: docker/build-push-action@v4 - with: - context: . - tags: "zoekt:latest" - push: "false" - build-args: VERSION=${{ steps.version.outputs.value }} - - - name: Login to Docker Hub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: build-push-webserver - uses: docker/build-push-action@v4 - with: - context: . - tags: sourcegraph/zoekt-webserver:${{ steps.version.outputs.value }}, ${{ steps.meta-webserver.outputs.tags }}, sourcegraph/zoekt-webserver:latest - file: Dockerfile.webserver - cache-from: sourcegraph/zoekt-webserver:latest - push: true - - - name: build-push-indexserver - uses: docker/build-push-action@v4 - with: - context: . - tags: sourcegraph/zoekt-indexserver:${{ steps.version.outputs.value }}, ${{ steps.meta-indexserver.outputs.tags }}, sourcegraph/zoekt-indexserver:latest - file: Dockerfile.indexserver - cache-from: sourcegraph/zoekt-indexserver:latest - push: true diff --git a/.github/workflows/docker-version.sh b/.github/workflows/docker-version.sh index 49638d7d0..ffc5ba3e8 100755 --- a/.github/workflows/docker-version.sh +++ b/.github/workflows/docker-version.sh @@ -1,13 +1,13 @@ #!/usr/bin/env bash -# This is the psuedo-version that go.mod uses. We use the same version string -# so that sourcegraph/sourcegraph's go.mod is kept in sync with the version we -# publish. +set -euo pipefail -printf "::set-output name=value::" - -TZ=UTC git --no-pager show \ +# This is the pseudo-version that go.mod uses. We use the same version string +# so that downstream consumers can line up image versions with module versions. +version="$(TZ=UTC git --no-pager show \ --quiet \ --abbrev=12 \ --date='format-local:%Y%m%d%H%M%S' \ - --format="0.0.0-%cd-%h" + --format='0.0.0-%cd-%h')" + +printf 'value=%s\n' "$version" >>"$GITHUB_OUTPUT" diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 000000000..79967e9c0 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,59 @@ +name: Publish Docker Image + +on: + push: + branches: + - main + tags: + - 'v*' + workflow_dispatch: + +jobs: + docker: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - name: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: version + id: version + run: .github/workflows/docker-version.sh + + - name: setup-buildx + uses: docker/setup-buildx-action@v3 + + - name: docker-meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/${{ github.repository }} + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=raw,value=${{ steps.version.outputs.value }},enable={{is_default_branch}} + type=raw,value=latest,enable={{is_default_branch}} + type=sha,prefix=sha-,format=short + + - name: login to ghcr.io + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: build and push + uses: docker/build-push-action@v6 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + build-args: | + VERSION=${{ steps.version.outputs.value }} diff --git a/AGENT.md b/AGENT.md new file mode 100644 index 000000000..053fb1fd2 --- /dev/null +++ b/AGENT.md @@ -0,0 +1,26 @@ +# Zoekt Coding Agent Guidelines + +## Build & Test Commands +- Build: `go build ./cmd/...` +- Run all tests: `go test ./... -short` +- Run a single test: `go test -run=TestName ./path/to/package` +- Run specific test with verbose output: `go test -v -run=TestName ./path/to/package` +- Benchmark: `go test -bench=BenchmarkName ./path/to/package` +- Fuzzing: `go test -fuzz=FuzzTestName -fuzztime=30s ./package` +- Smoke test: Check a specific repo: `go run ./cmd/zoekt-git-index /path/to/repo` + +## Code Style Guidelines +- Import format: standard Go imports (stdlib, external, internal) with alphabetical sorting +- Error handling: explicit error checking with proper returns (no ignored errors) +- Naming: Go standard (CamelCase for exported, camelCase for private) +- Tests: Table-driven tests preferred with descriptive names +- Documentation: All exported functions should have comments +- Shell scripts: Use shfmt with `-i 2 -ci -bn` flags +- Proto files: Run buf lint and format checks +- Memory optimization: As a code search database, Zoekt is memory-sensitive - be conscious of struct field ordering and memory usage in core structures + +## Documentation Resources +- Design overview: `doc/design.md` - Core architecture and search methodology +- Indexing details: `doc/indexing.md` - How the indexing process works +- Query syntax: `doc/query_syntax.md` - Search query language reference +- FAQ: `doc/faq.md` - Common questions and troubleshooting \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..a1cab4a92 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,17 @@ +# Contributing + +We welcome contributions to the project! To propose a change, please fork the repository, make your changes, then submit +a pull request. If the change is significant or potentially controversial, please open an issue first to discuss it. +Zoekt does not require a CLA to contribute. + +Before opening a pull request, make sure that you have run the tests locally: +```sh +go test ./... +``` + +It's also good to run a local smoke test for the relevant component. For example, if you've made changes in +`zoekt-git-index`, you can try indexing a repository locally: +```sh +go run ./cmd/zoekt-git-index /path/to/repo +``` + diff --git a/Dockerfile b/Dockerfile index f7a43b405..1cdf93b76 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,25 +1,47 @@ -FROM golang:1.23.4-alpine3.19 AS builder +# syntax=docker/dockerfile:1.7 +FROM golang:1.26.2-alpine AS builder RUN apk add --no-cache ca-certificates ENV CGO_ENABLED=0 -WORKDIR /go/src/github.com/sourcegraph/zoekt +WORKDIR /src -# Cache dependencies +# Cache dependency resolution separately from source changes. COPY go.mod go.sum ./ -RUN go mod download +RUN --mount=type=cache,target=/go/pkg/mod \ + go mod download + +COPY . . +ARG VERSION=dev +RUN --mount=type=cache,target=/go/pkg/mod \ + --mount=type=cache,target=/root/.cache/go-build \ + mkdir -p /out && \ + go build \ + -trimpath \ + -ldflags "-X github.com/sourcegraph/zoekt.Version=$VERSION" \ + -o /out/ \ + ./cmd/... + +FROM alpine:3 -COPY . ./ -ARG VERSION -RUN go install -ldflags "-X github.com/sourcegraph/zoekt.Version=$VERSION" ./cmd/... +RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget -FROM alpine:3.19 AS zoekt +COPY --chmod=755 install-ctags-alpine.sh /usr/local/bin/install-ctags-alpine.sh +RUN /usr/local/bin/install-ctags-alpine.sh && \ + rm /usr/local/bin/install-ctags-alpine.sh \ + /usr/local/bin/universal-optscript -RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget +RUN addgroup -S zoekt && \ + adduser -S -G zoekt -h /home/zoekt zoekt && \ + mkdir -p /data/index /home/zoekt && \ + chown -R zoekt:zoekt /data /home/zoekt + +COPY --from=builder /out/ /usr/local/bin/ -COPY install-ctags-alpine.sh . -RUN ./install-ctags-alpine.sh && rm install-ctags-alpine.sh +USER zoekt +WORKDIR /home/zoekt -COPY --from=builder /go/bin/* /usr/local/bin/ +ENV DATA_DIR=/data/index ENTRYPOINT ["/sbin/tini", "--"] +CMD ["zoekt-webserver", "-index", "/data/index", "-pprof", "-rpc"] diff --git a/Dockerfile.indexserver b/Dockerfile.indexserver deleted file mode 100644 index d1ddb90d0..000000000 --- a/Dockerfile.indexserver +++ /dev/null @@ -1,23 +0,0 @@ -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates bind-tools tini git jansson - -# Run as non-root user sourcegraph. External volumes should be mounted under /data (which will be owned by sourcegraph). -RUN mkdir -p /home/sourcegraph -RUN addgroup -S sourcegraph && adduser -S -G sourcegraph -h /home/sourcegraph sourcegraph && mkdir -p /data && chown -R sourcegraph:sourcegraph /data -USER sourcegraph -WORKDIR /home/sourcegraph - -ENV SRC_FRONTEND_INTERNAL http://sourcegraph-frontend-internal -ENV DATA_DIR /data/index -RUN mkdir -p ${DATA_DIR} - -COPY --from=zoekt \ - /usr/local/bin/universal-* \ - /usr/local/bin/zoekt-sourcegraph-indexserver \ - /usr/local/bin/zoekt-archive-index \ - /usr/local/bin/zoekt-git-index \ - /usr/local/bin/zoekt-merge-index \ - /usr/local/bin/ - -ENTRYPOINT ["/sbin/tini", "--", "zoekt-sourcegraph-indexserver"] diff --git a/Dockerfile.webserver b/Dockerfile.webserver deleted file mode 100644 index cccdfa83d..000000000 --- a/Dockerfile.webserver +++ /dev/null @@ -1,24 +0,0 @@ -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates bind-tools tini - -# Run as non-root user sourcegraph. External volumes should be mounted under /data (which will be owned by sourcegraph). -RUN mkdir -p /home/sourcegraph -RUN addgroup -S sourcegraph && adduser -S -G sourcegraph -h /home/sourcegraph sourcegraph && mkdir -p /data && chown -R sourcegraph:sourcegraph /data -USER sourcegraph -WORKDIR /home/sourcegraph - -ENV DATA_DIR /data/index -RUN mkdir -p ${DATA_DIR} - -# We copy from the locally built zoekt image -COPY --from=zoekt /usr/local/bin/zoekt-webserver /usr/local/bin/ - -# zoekt-webserver has a large stable heap size (10s of gigs), and as such the -# default GOGC=100 could be better tuned. https://dave.cheney.net/tag/gogc -# In go1.18 the GC changed significantly and from experimentation we tuned it -# down from 50 to 25. -ENV GOGC=25 - -ENTRYPOINT ["/sbin/tini", "--"] -CMD zoekt-webserver -index $DATA_DIR -pprof -rpc -indexserver_proxy diff --git a/README.md b/README.md index 1a09a37ca..5f060f2d3 100644 --- a/README.md +++ b/README.md @@ -1,150 +1,117 @@ +# Zoekt: fast code search "Zoekt, en gij zult spinazie eten" - Jan Eertink ("seek, and ye shall eat spinach" - My primary school teacher) -This is a fast text search engine, intended for use with source +Zoekt is a text search engine intended for use with source code. (Pronunciation: roughly as you would pronounce "zooked" in English) -**Note:** This is a [Sourcegraph](https://github.com/sourcegraph/zoekt) fork -of [github.com/google/zoekt](https://github.com/google/zoekt). It is now the -main maintained source of Zoekt. +**Note:** This has been the maintained source for Zoekt since 2017, when it was forked from the +original repository [github.com/google/zoekt](https://github.com/google/zoekt). -# INSTRUCTIONS +## Background -## Downloading +Zoekt supports fast substring and regexp matching on source code, with a rich query language +that includes boolean operators (and, or, not). It can search individual repositories, and search +across many repositories in a large codebase. Zoekt ranks search results using a combination of code-related signals +like whether the match is on a symbol. Because of its general design based on trigram indexing and syntactic +parsing, it works well for a variety of programming languages. - go get github.com/sourcegraph/zoekt/ - -## Indexing - -### Directory +The two main ways to use the project are +* Through individual commands, to index repositories and perform searches through Zoekt's [query language](doc/query_syntax.md) +* Or, through the indexserver and webserver, which support syncing repositories from a code host and searching them through a web UI or API - go install github.com/sourcegraph/zoekt/cmd/zoekt-index - $GOPATH/bin/zoekt-index . - -### Git repository +For more details on Zoekt's design, see the [docs directory](doc/). - go install github.com/sourcegraph/zoekt/cmd/zoekt-git-index - $GOPATH/bin/zoekt-git-index -branches master,stable-1.4 -prefix origin/ . +## Usage -### Repo repositories +### Installation - go install github.com/sourcegraph/zoekt/cmd/zoekt-{repo-index,mirror-gitiles} - zoekt-mirror-gitiles -dest ~/repos/ https://gfiber.googlesource.com - zoekt-repo-index \ - -name gfiber \ - -base_url https://gfiber.googlesource.com/ \ - -manifest_repo ~/repos/gfiber.googlesource.com/manifests.git \ - -repo_cache ~/repos \ - -manifest_rev_prefix=refs/heads/ --rev_prefix= \ - master:default_unrestricted.xml + go get github.com/sourcegraph/zoekt/ -## Searching +**Note**: It is also recommended to install [Universal ctags](https://github.com/universal-ctags/ctags), as symbol +information is a key signal in ranking search results. See [ctags.md](doc/ctags.md) for more information. -### Web interface +### Command-based usage - go install github.com/sourcegraph/zoekt/cmd/zoekt-webserver - $GOPATH/bin/zoekt-webserver -listen :6070 +Zoekt supports indexing and searching repositories on the command line. This is most helpful +for simple local usage, or for testing and development. -### JSON API +#### Indexing a local git repo -You can retrieve search results as JSON by sending a GET request to zoekt-webserver. + go install github.com/sourcegraph/zoekt/cmd/zoekt-git-index + $GOPATH/bin/zoekt-git-index -index ~/.zoekt /path/to/repo - curl --get \ - --url "http://localhost:6070/search" \ - --data-urlencode "q=ngram f:READ" \ - --data-urlencode "num=50" \ - --data-urlencode "format=json" +#### Indexing a local directory (not git-specific) -The response data is a JSON object. You can refer to [web.ApiSearchResult](https://sourcegraph.com/github.com/sourcegraph/zoekt@6b1df4f8a3d7b34f13ba0cafd8e1a9b3fc728cf0/-/blob/web/api.go?L23:6&subtree=true) to learn about the structure of the object. + go install github.com/sourcegraph/zoekt/cmd/zoekt-index + $GOPATH/bin/zoekt-index -index ~/.zoekt /path/to/repo -### CLI +#### Searching an index go install github.com/sourcegraph/zoekt/cmd/zoekt - $GOPATH/bin/zoekt 'ngram f:READ' + $GOPATH/bin/zoekt 'hello' + $GOPATH/bin/zoekt 'hello file:README' -## Installation -A more organized installation on a Linux server should use a systemd unit file, -eg. +### Zoekt services - [Unit] - Description=zoekt webserver +Zoekt also contains an index server and web server to support larger-scale indexing and searching +of remote repositories. The index server can be configured to periodically fetch and reindex repositories +from a code host. The webserver can be configured to serve search results through a web UI or API. - [Service] - ExecStart=/zoekt/bin/zoekt-webserver -index /zoekt/index -listen :443 --ssl_cert /zoekt/etc/cert.pem --ssl_key /zoekt/etc/key.pem - Restart=always +#### Indexing a GitHub organization + + go install github.com/sourcegraph/zoekt/cmd/zoekt-indexserver - [Install] - WantedBy=default.target + echo YOUR_GITHUB_TOKEN_HERE > token.txt + echo '[{"GitHubOrg": "apache", "CredentialPath": "token.txt"}]' > config.json + $GOPATH/bin/zoekt-indexserver -mirror_config config.json -data_dir ~/.zoekt/ -# SEARCH SERVICE +This will fetch all repos under 'github.com/apache', then index the repositories. The indexserver takes care of +periodically fetching and indexing new data, and cleaning up logfiles. See [config.go](cmd/zoekt-indexserver/config.go) +for more details on this configuration. -Zoekt comes with a small service management program: +#### Starting the web server - go install github.com/sourcegraph/zoekt/cmd/zoekt-indexserver + go install github.com/sourcegraph/zoekt/cmd/zoekt-webserver + $GOPATH/bin/zoekt-webserver -index ~/.zoekt/ - cat << EOF > config.json - [{"GithubUser": "username"}, - {"GithubOrg": "org"}, - {"GitilesURL": "https://gerrit.googlesource.com", "Name": "zoekt" } - ] - EOF +This will start a web server with a simple search UI at http://localhost:6070. +See the [query syntax docs](doc/query_syntax.md) for more details on the query +language. - $GOPATH/bin/zoekt-indexserver -mirror_config config.json +#### Container image -This will mirror all repos under 'github.com/username', 'github.com/org', as -well as the 'zoekt' repository. It will index the repositories. +Zoekt publishes a single container image at `ghcr.io/sourcegraph/zoekt`. It +includes the Zoekt binaries, `git`, and `universal-ctags`. By default it runs +`zoekt-webserver` against `/data/index`: -It takes care of fetching and indexing new data and cleaning up logfiles. + docker run --rm -p 6070:6070 -v "$PWD/index:/data/index" ghcr.io/sourcegraph/zoekt -The webserver can be started from a standard service management framework, such -as systemd. +You can override the default command to run `zoekt-indexserver` instead. This +example stores cloned repositories, logs, and indexes under `/data` and reads a +mounted mirror config file: + docker run --rm \ + -v "$PWD/config.json:/config.json:ro" \ + -v "$PWD/token.txt:/home/zoekt/token.txt:ro" \ + -v "$PWD/zoekt-data:/data" \ + ghcr.io/sourcegraph/zoekt \ + zoekt-indexserver -mirror_config /config.json -data_dir /data -# SYMBOL SEARCH +If you start the web server with `-rpc`, it exposes a [simple JSON search +API](doc/json-api.md) at `http://localhost:6070/api/search`. -It is recommended to install [Universal -ctags](https://github.com/universal-ctags/ctags) to improve -ranking. See [here](doc/ctags.md) for more information. +The JSON API supports advanced features including: +- Streaming search results (using the `FlushWallTime` option) +- Alternative BM25 scoring (using the `UseBM25Scoring` option) +- Context lines around matches (using the `NumContextLines` option) +Finally, the web server exposes a gRPC API that supports [structured query objects](query/query.go) and advanced search options. -# ACKNOWLEDGEMENTS +## Acknowledgements Thanks to Han-Wen Nienhuys for creating Zoekt. Thanks to Alexander Neubeck for coming up with this idea, and helping Han-Wen Nienhuys flesh it out. - - -# FORK DETAILS - -Originally this fork contained some changes that do not make sense to upstream -and or have not yet been upstreamed. However, this is now the defacto source -for Zoekt. This section will remain for historical reasons and contains -outdated information. It can be removed once the dust settles on moving from -google/zoekt to sourcegraph/zoekt. Differences: - -- [zoekt-sourcegraph-indexserver](cmd/zoekt-sourcegraph-indexserver/main.go) - is a Sourcegraph specific command which indexes all enabled repositories on - Sourcegraph, as well as keeping the indexes up to date. -- We have exposed the API via - [keegancsmith/rpc](https://github.com/keegancsmith/rpc) (a fork of `net/rpc` - which supports cancellation). -- Query primitive `BranchesRepos` to efficiently specify a set of repositories to - search. -- Allow empty shard directories on startup. Needed when starting a fresh - instance which hasn't indexed anything yet. -- We can return symbol/ctag data in results. Additionally we can run symbol regex queries. -- We search shards in order of repo name and ignore shard ranking. -- Other minor changes. - -Assuming you have the gerrit upstream configured, a useful way to see what we -changed is: - -``` shellsession -$ git diff gerrit/master -- ':(exclude)vendor/' ':(exclude)Gopkg*' -``` - -# DISCLAIMER - -This is not an official Google product diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..b8e00ada3 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,3 @@ +# Security Policy + +Our security policy is documented at https://sourcegraph.com/security. diff --git a/api.go b/api.go index 70e7774bd..0ed06decf 100644 --- a/api.go +++ b/api.go @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt // import "github.com/sourcegraph/zoekt" +package zoekt import ( "context" "encoding/json" "errors" "fmt" + "math" "reflect" "strconv" "strings" @@ -32,7 +33,6 @@ const ( sliceHeaderBytes uint64 = 24 stringHeaderBytes uint64 = 16 pointerSize uint64 = 8 - interfaceBytes uint64 = 16 ) // FileMatch contains all the matches within a file. @@ -135,6 +135,22 @@ func (m *FileMatch) sizeBytes() (sz uint64) { return } +// AddScore increments the score of the FileMatch by the computed score. If +// debugScore is true, it also adds a debug string to the FileMatch. If raw is +// -1, it is ignored. Otherwise, it is added to the debug string. +func (m *FileMatch) AddScore(what string, computed float64, raw float64, debugScore bool) { + if computed != 0 && debugScore { + var b strings.Builder + fmt.Fprintf(&b, "%s", what) + if raw != -1 { + fmt.Fprintf(&b, "(%s)", strconv.FormatFloat(raw, 'f', -1, 64)) + } + fmt.Fprintf(&b, ":%.2f, ", computed) + m.Debug += b.String() + } + m.Score += computed +} + // ChunkMatch is a set of non-overlapping matches within a contiguous range of // lines in the file. type ChunkMatch struct { @@ -571,6 +587,9 @@ type Repository struct { // The repository URL. URL string + // Additional metadata about the repository. + Metadata map[string]string + // The physical source where this repo came from, eg. full // path to the zip filename or git repository directory. This // will not be exposed in the UI, but can be used to detect @@ -631,8 +650,8 @@ type Repository struct { func (r *Repository) UnmarshalJSON(data []byte) error { // We define a new type so that we can use json.Unmarshal // without recursing into this same method. - type repository *Repository - repo := repository(r) + type repository Repository + repo := (*repository)(r) err := json.Unmarshal(data, repo) if err != nil { @@ -671,13 +690,17 @@ func (r *Repository) UnmarshalJSON(data []byte) error { // Normalize the repo score within [0, maxUint16), with the midpoint at 5,000. // This means popular repos (roughly ones with over 5,000 stars) see diminishing // returns from more stars. - r.Rank = uint16(r.priority / (5000.0 + r.priority) * maxUInt16) + r.Rank = uint16(r.priority / (5000.0 + r.priority) * math.MaxUint16) } } return nil } +func (r *Repository) GetPriority() float64 { + return r.priority +} + // monthsSince1970 returns the number of months since 1970. It returns values in // the range [0, maxUInt16]. The upper bound is reached in the year 7431, the // lower bound for all dates before 1970. @@ -687,7 +710,7 @@ func monthsSince1970(t time.Time) uint16 { return 0 } months := int(t.Year()-1970)*12 + int(t.Month()-1) - return uint16(min(months, maxUInt16)) + return uint16(min(months, math.MaxUint16)) } // MergeMutable will merge x into r. mutated will be true if it made any @@ -976,7 +999,8 @@ type SearchOptions struct { // EXPERIMENTAL. If true, use text-search style scoring instead of the default // scoring formula. The scoring algorithm treats each match in a file as a term - // and computes an approximation to BM25. + // and computes an approximation to BM25. When enabled, BM25 scoring is used for + // the overall FileMatch score, as well as individual LineMatch and ChunkMatch scores. // // The calculation of IDF assumes that Zoekt visits all documents containing any // of the query terms during evaluation. This is true, for example, if all query @@ -996,6 +1020,17 @@ type SearchOptions struct { SpanContext map[string]string } +func (o *SearchOptions) SetDefaults() { + if o.ShardMaxMatchCount == 0 { + // We cap the total number of matches, so overly broad + // searches don't crash the machine. + o.ShardMaxMatchCount = 100000 + } + if o.TotalMaxMatchCount == 0 { + o.TotalMaxMatchCount = 10 * o.ShardMaxMatchCount + } +} + // String returns a succinct representation of the options. This is meant for // human consumption in logs and traces. // diff --git a/api_proto.go b/api_proto.go index 4ca3e0758..3a6c6a947 100644 --- a/api_proto.go +++ b/api_proto.go @@ -21,10 +21,10 @@ import ( "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/timestamppb" - proto "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" + webserverv1 "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" ) -func FileMatchFromProto(p *proto.FileMatch) FileMatch { +func FileMatchFromProto(p *webserverv1.FileMatch) FileMatch { lineMatches := make([]LineMatch, len(p.GetLineMatches())) for i, lineMatch := range p.GetLineMatches() { lineMatches[i] = LineMatchFromProto(lineMatch) @@ -54,18 +54,18 @@ func FileMatchFromProto(p *proto.FileMatch) FileMatch { } } -func (m *FileMatch) ToProto() *proto.FileMatch { - lineMatches := make([]*proto.LineMatch, len(m.LineMatches)) +func (m *FileMatch) ToProto() *webserverv1.FileMatch { + lineMatches := make([]*webserverv1.LineMatch, len(m.LineMatches)) for i, lm := range m.LineMatches { lineMatches[i] = lm.ToProto() } - chunkMatches := make([]*proto.ChunkMatch, len(m.ChunkMatches)) + chunkMatches := make([]*webserverv1.ChunkMatch, len(m.ChunkMatches)) for i, cm := range m.ChunkMatches { chunkMatches[i] = cm.ToProto() } - return &proto.FileMatch{ + return &webserverv1.FileMatch{ Score: m.Score, Debug: m.Debug, FileName: []byte(m.FileName), @@ -84,7 +84,7 @@ func (m *FileMatch) ToProto() *proto.FileMatch { } } -func ChunkMatchFromProto(p *proto.ChunkMatch) ChunkMatch { +func ChunkMatchFromProto(p *webserverv1.ChunkMatch) ChunkMatch { ranges := make([]Range, len(p.GetRanges())) for i, r := range p.GetRanges() { ranges[i] = RangeFromProto(r) @@ -107,18 +107,18 @@ func ChunkMatchFromProto(p *proto.ChunkMatch) ChunkMatch { } } -func (cm *ChunkMatch) ToProto() *proto.ChunkMatch { - ranges := make([]*proto.Range, len(cm.Ranges)) +func (cm *ChunkMatch) ToProto() *webserverv1.ChunkMatch { + ranges := make([]*webserverv1.Range, len(cm.Ranges)) for i, r := range cm.Ranges { ranges[i] = r.ToProto() } - symbolInfo := make([]*proto.SymbolInfo, len(cm.SymbolInfo)) + symbolInfo := make([]*webserverv1.SymbolInfo, len(cm.SymbolInfo)) for i, si := range cm.SymbolInfo { symbolInfo[i] = si.ToProto() } - return &proto.ChunkMatch{ + return &webserverv1.ChunkMatch{ Content: cm.Content, ContentStart: cm.ContentStart.ToProto(), FileName: cm.FileName, @@ -130,21 +130,21 @@ func (cm *ChunkMatch) ToProto() *proto.ChunkMatch { } } -func RangeFromProto(p *proto.Range) Range { +func RangeFromProto(p *webserverv1.Range) Range { return Range{ Start: LocationFromProto(p.GetStart()), End: LocationFromProto(p.GetEnd()), } } -func (r *Range) ToProto() *proto.Range { - return &proto.Range{ +func (r *Range) ToProto() *webserverv1.Range { + return &webserverv1.Range{ Start: r.Start.ToProto(), End: r.End.ToProto(), } } -func LocationFromProto(p *proto.Location) Location { +func LocationFromProto(p *webserverv1.Location) Location { return Location{ ByteOffset: p.GetByteOffset(), LineNumber: p.GetLineNumber(), @@ -152,15 +152,15 @@ func LocationFromProto(p *proto.Location) Location { } } -func (l *Location) ToProto() *proto.Location { - return &proto.Location{ +func (l *Location) ToProto() *webserverv1.Location { + return &webserverv1.Location{ ByteOffset: l.ByteOffset, LineNumber: l.LineNumber, Column: l.Column, } } -func LineMatchFromProto(p *proto.LineMatch) LineMatch { +func LineMatchFromProto(p *webserverv1.LineMatch) LineMatch { lineFragments := make([]LineFragmentMatch, len(p.GetLineFragments())) for i, lineFragment := range p.GetLineFragments() { lineFragments[i] = LineFragmentMatchFromProto(lineFragment) @@ -180,13 +180,13 @@ func LineMatchFromProto(p *proto.LineMatch) LineMatch { } } -func (lm *LineMatch) ToProto() *proto.LineMatch { - fragments := make([]*proto.LineFragmentMatch, len(lm.LineFragments)) +func (lm *LineMatch) ToProto() *webserverv1.LineMatch { + fragments := make([]*webserverv1.LineFragmentMatch, len(lm.LineFragments)) for i, fragment := range lm.LineFragments { fragments[i] = fragment.ToProto() } - return &proto.LineMatch{ + return &webserverv1.LineMatch{ Line: lm.Line, LineStart: int64(lm.LineStart), LineEnd: int64(lm.LineEnd), @@ -200,7 +200,7 @@ func (lm *LineMatch) ToProto() *proto.LineMatch { } } -func SymbolFromProto(p *proto.SymbolInfo) *Symbol { +func SymbolFromProto(p *webserverv1.SymbolInfo) *Symbol { if p == nil { return nil } @@ -213,12 +213,12 @@ func SymbolFromProto(p *proto.SymbolInfo) *Symbol { } } -func (s *Symbol) ToProto() *proto.SymbolInfo { +func (s *Symbol) ToProto() *webserverv1.SymbolInfo { if s == nil { return nil } - return &proto.SymbolInfo{ + return &webserverv1.SymbolInfo{ Sym: s.Sym, Kind: s.Kind, Parent: s.Parent, @@ -226,7 +226,7 @@ func (s *Symbol) ToProto() *proto.SymbolInfo { } } -func LineFragmentMatchFromProto(p *proto.LineFragmentMatch) LineFragmentMatch { +func LineFragmentMatchFromProto(p *webserverv1.LineFragmentMatch) LineFragmentMatch { return LineFragmentMatch{ LineOffset: int(p.GetLineOffset()), Offset: p.GetOffset(), @@ -235,8 +235,8 @@ func LineFragmentMatchFromProto(p *proto.LineFragmentMatch) LineFragmentMatch { } } -func (lfm *LineFragmentMatch) ToProto() *proto.LineFragmentMatch { - return &proto.LineFragmentMatch{ +func (lfm *LineFragmentMatch) ToProto() *webserverv1.LineFragmentMatch { + return &webserverv1.LineFragmentMatch{ LineOffset: int64(lfm.LineOffset), Offset: lfm.Offset, MatchLength: int64(lfm.MatchLength), @@ -244,29 +244,29 @@ func (lfm *LineFragmentMatch) ToProto() *proto.LineFragmentMatch { } } -func FlushReasonFromProto(p proto.FlushReason) FlushReason { +func FlushReasonFromProto(p webserverv1.FlushReason) FlushReason { switch p { - case proto.FlushReason_FLUSH_REASON_TIMER_EXPIRED: + case webserverv1.FlushReason_FLUSH_REASON_TIMER_EXPIRED: return FlushReasonTimerExpired - case proto.FlushReason_FLUSH_REASON_FINAL_FLUSH: + case webserverv1.FlushReason_FLUSH_REASON_FINAL_FLUSH: return FlushReasonFinalFlush - case proto.FlushReason_FLUSH_REASON_MAX_SIZE: + case webserverv1.FlushReason_FLUSH_REASON_MAX_SIZE: return FlushReasonMaxSize default: return FlushReason(0) } } -func (fr FlushReason) ToProto() proto.FlushReason { +func (fr FlushReason) ToProto() webserverv1.FlushReason { switch fr { case FlushReasonTimerExpired: - return proto.FlushReason_FLUSH_REASON_TIMER_EXPIRED + return webserverv1.FlushReason_FLUSH_REASON_TIMER_EXPIRED case FlushReasonFinalFlush: - return proto.FlushReason_FLUSH_REASON_FINAL_FLUSH + return webserverv1.FlushReason_FLUSH_REASON_FINAL_FLUSH case FlushReasonMaxSize: - return proto.FlushReason_FLUSH_REASON_MAX_SIZE + return webserverv1.FlushReason_FLUSH_REASON_MAX_SIZE default: - return proto.FlushReason_FLUSH_REASON_UNKNOWN_UNSPECIFIED + return webserverv1.FlushReason_FLUSH_REASON_UNKNOWN_UNSPECIFIED } } @@ -284,7 +284,7 @@ func (fr FlushReason) Generate(rand *rand.Rand, size int) reflect.Value { } } -func StatsFromProto(p *proto.Stats) Stats { +func StatsFromProto(p *webserverv1.Stats) Stats { return Stats{ ContentBytesLoaded: p.GetContentBytesLoaded(), IndexBytesLoaded: p.GetIndexBytesLoaded(), @@ -309,8 +309,8 @@ func StatsFromProto(p *proto.Stats) Stats { } } -func (s *Stats) ToProto() *proto.Stats { - return &proto.Stats{ +func (s *Stats) ToProto() *webserverv1.Stats { + return &webserverv1.Stats{ ContentBytesLoaded: s.ContentBytesLoaded, IndexBytesLoaded: s.IndexBytesLoaded, Crashes: int64(s.Crashes), @@ -334,21 +334,21 @@ func (s *Stats) ToProto() *proto.Stats { } } -func ProgressFromProto(p *proto.Progress) Progress { +func ProgressFromProto(p *webserverv1.Progress) Progress { return Progress{ Priority: p.GetPriority(), MaxPendingPriority: p.GetMaxPendingPriority(), } } -func (p *Progress) ToProto() *proto.Progress { - return &proto.Progress{ +func (p *Progress) ToProto() *webserverv1.Progress { + return &webserverv1.Progress{ Priority: p.Priority, MaxPendingPriority: p.MaxPendingPriority, } } -func SearchResultFromStreamProto(p *proto.StreamSearchResponse, repoURLs, lineFragments map[string]string) *SearchResult { +func SearchResultFromStreamProto(p *webserverv1.StreamSearchResponse, repoURLs, lineFragments map[string]string) *SearchResult { if p == nil { return nil } @@ -356,7 +356,7 @@ func SearchResultFromStreamProto(p *proto.StreamSearchResponse, repoURLs, lineFr return SearchResultFromProto(p.GetResponseChunk(), repoURLs, lineFragments) } -func SearchResultFromProto(p *proto.SearchResponse, repoURLs, lineFragments map[string]string) *SearchResult { +func SearchResultFromProto(p *webserverv1.SearchResponse, repoURLs, lineFragments map[string]string) *SearchResult { if p == nil { return nil } @@ -377,17 +377,17 @@ func SearchResultFromProto(p *proto.SearchResponse, repoURLs, lineFragments map[ } } -func (sr *SearchResult) ToProto() *proto.SearchResponse { +func (sr *SearchResult) ToProto() *webserverv1.SearchResponse { if sr == nil { return nil } - files := make([]*proto.FileMatch, len(sr.Files)) + files := make([]*webserverv1.FileMatch, len(sr.Files)) for i, file := range sr.Files { files[i] = file.ToProto() } - return &proto.SearchResponse{ + return &webserverv1.SearchResponse{ Stats: sr.Stats.ToProto(), Progress: sr.Progress.ToProto(), @@ -395,29 +395,29 @@ func (sr *SearchResult) ToProto() *proto.SearchResponse { } } -func (sr *SearchResult) ToStreamProto() *proto.StreamSearchResponse { +func (sr *SearchResult) ToStreamProto() *webserverv1.StreamSearchResponse { if sr == nil { return nil } - return &proto.StreamSearchResponse{ResponseChunk: sr.ToProto()} + return &webserverv1.StreamSearchResponse{ResponseChunk: sr.ToProto()} } -func RepositoryBranchFromProto(p *proto.RepositoryBranch) RepositoryBranch { +func RepositoryBranchFromProto(p *webserverv1.RepositoryBranch) RepositoryBranch { return RepositoryBranch{ Name: p.GetName(), Version: p.GetVersion(), } } -func (r *RepositoryBranch) ToProto() *proto.RepositoryBranch { - return &proto.RepositoryBranch{ +func (r *RepositoryBranch) ToProto() *webserverv1.RepositoryBranch { + return &webserverv1.RepositoryBranch{ Name: r.Name, Version: r.Version, } } -func RepositoryFromProto(p *proto.Repository) Repository { +func RepositoryFromProto(p *webserverv1.Repository) Repository { branches := make([]RepositoryBranch, len(p.GetBranches())) for i, branch := range p.GetBranches() { branches[i] = RepositoryBranchFromProto(branch) @@ -453,20 +453,21 @@ func RepositoryFromProto(p *proto.Repository) Repository { Tombstone: p.GetTombstone(), LatestCommitDate: p.GetLatestCommitDate().AsTime(), FileTombstones: fileTombstones, + Metadata: p.GetMetadata(), } } -func (r *Repository) ToProto() *proto.Repository { +func (r *Repository) ToProto() *webserverv1.Repository { if r == nil { return nil } - branches := make([]*proto.RepositoryBranch, len(r.Branches)) + branches := make([]*webserverv1.RepositoryBranch, len(r.Branches)) for i, branch := range r.Branches { branches[i] = branch.ToProto() } - subRepoMap := make(map[string]*proto.Repository, len(r.SubRepoMap)) + subRepoMap := make(map[string]*webserverv1.Repository, len(r.SubRepoMap)) for name, repo := range r.SubRepoMap { subRepoMap[name] = repo.ToProto() } @@ -476,7 +477,7 @@ func (r *Repository) ToProto() *proto.Repository { fileTombstones = append(fileTombstones, file) } - return &proto.Repository{ + return &webserverv1.Repository{ TenantId: int64(r.TenantID), Id: r.ID, Name: r.Name, @@ -495,10 +496,11 @@ func (r *Repository) ToProto() *proto.Repository { Tombstone: r.Tombstone, LatestCommitDate: timestamppb.New(r.LatestCommitDate), FileTombstones: fileTombstones, + Metadata: r.Metadata, } } -func IndexMetadataFromProto(p *proto.IndexMetadata) IndexMetadata { +func IndexMetadataFromProto(p *webserverv1.IndexMetadata) IndexMetadata { languageMap := make(map[string]uint16, len(p.GetLanguageMap())) for language, id := range p.GetLanguageMap() { languageMap[language] = uint16(id) @@ -516,7 +518,7 @@ func IndexMetadataFromProto(p *proto.IndexMetadata) IndexMetadata { } } -func (m *IndexMetadata) ToProto() *proto.IndexMetadata { +func (m *IndexMetadata) ToProto() *webserverv1.IndexMetadata { if m == nil { return nil } @@ -526,7 +528,7 @@ func (m *IndexMetadata) ToProto() *proto.IndexMetadata { languageMap[language] = uint32(id) } - return &proto.IndexMetadata{ + return &webserverv1.IndexMetadata{ IndexFormatVersion: int64(m.IndexFormatVersion), IndexFeatureVersion: int64(m.IndexFeatureVersion), IndexMinReaderVersion: int64(m.IndexMinReaderVersion), @@ -538,7 +540,7 @@ func (m *IndexMetadata) ToProto() *proto.IndexMetadata { } } -func RepoStatsFromProto(p *proto.RepoStats) RepoStats { +func RepoStatsFromProto(p *webserverv1.RepoStats) RepoStats { return RepoStats{ Repos: int(p.GetRepos()), Shards: int(p.GetShards()), @@ -551,8 +553,8 @@ func RepoStatsFromProto(p *proto.RepoStats) RepoStats { } } -func (s *RepoStats) ToProto() *proto.RepoStats { - return &proto.RepoStats{ +func (s *RepoStats) ToProto() *webserverv1.RepoStats { + return &webserverv1.RepoStats{ Repos: int64(s.Repos), Shards: int64(s.Shards), Documents: int64(s.Documents), @@ -564,7 +566,7 @@ func (s *RepoStats) ToProto() *proto.RepoStats { } } -func RepoListEntryFromProto(p *proto.RepoListEntry) *RepoListEntry { +func RepoListEntryFromProto(p *webserverv1.RepoListEntry) *RepoListEntry { if p == nil { return nil } @@ -576,19 +578,19 @@ func RepoListEntryFromProto(p *proto.RepoListEntry) *RepoListEntry { } } -func (r *RepoListEntry) ToProto() *proto.RepoListEntry { +func (r *RepoListEntry) ToProto() *webserverv1.RepoListEntry { if r == nil { return nil } - return &proto.RepoListEntry{ + return &webserverv1.RepoListEntry{ Repository: r.Repository.ToProto(), IndexMetadata: r.IndexMetadata.ToProto(), Stats: r.Stats.ToProto(), } } -func MinimalRepoListEntryFromProto(p *proto.MinimalRepoListEntry) MinimalRepoListEntry { +func MinimalRepoListEntryFromProto(p *webserverv1.MinimalRepoListEntry) MinimalRepoListEntry { branches := make([]RepositoryBranch, len(p.GetBranches())) for i, branch := range p.GetBranches() { branches[i] = RepositoryBranchFromProto(branch) @@ -601,19 +603,19 @@ func MinimalRepoListEntryFromProto(p *proto.MinimalRepoListEntry) MinimalRepoLis } } -func (m *MinimalRepoListEntry) ToProto() *proto.MinimalRepoListEntry { - branches := make([]*proto.RepositoryBranch, len(m.Branches)) +func (m *MinimalRepoListEntry) ToProto() *webserverv1.MinimalRepoListEntry { + branches := make([]*webserverv1.RepositoryBranch, len(m.Branches)) for i, branch := range m.Branches { branches[i] = branch.ToProto() } - return &proto.MinimalRepoListEntry{ + return &webserverv1.MinimalRepoListEntry{ HasSymbols: m.HasSymbols, Branches: branches, IndexTimeUnix: m.IndexTimeUnix, } } -func RepoListFromProto(p *proto.ListResponse) *RepoList { +func RepoListFromProto(p *webserverv1.ListResponse) *RepoList { repos := make([]*RepoListEntry, len(p.GetRepos())) for i, repo := range p.GetRepos() { repos[i] = RepoListEntryFromProto(repo) @@ -632,18 +634,18 @@ func RepoListFromProto(p *proto.ListResponse) *RepoList { } } -func (r *RepoList) ToProto() *proto.ListResponse { - repos := make([]*proto.RepoListEntry, len(r.Repos)) +func (r *RepoList) ToProto() *webserverv1.ListResponse { + repos := make([]*webserverv1.RepoListEntry, len(r.Repos)) for i, repo := range r.Repos { repos[i] = repo.ToProto() } - reposMap := make(map[uint32]*proto.MinimalRepoListEntry, len(r.ReposMap)) + reposMap := make(map[uint32]*webserverv1.MinimalRepoListEntry, len(r.ReposMap)) for id, repo := range r.ReposMap { reposMap[id] = repo.ToProto() } - return &proto.ListResponse{ + return &webserverv1.ListResponse{ Repos: repos, ReposMap: reposMap, Crashes: int64(r.Crashes), @@ -651,32 +653,32 @@ func (r *RepoList) ToProto() *proto.ListResponse { } } -func (l *ListOptions) ToProto() *proto.ListOptions { +func (l *ListOptions) ToProto() *webserverv1.ListOptions { if l == nil { return nil } - var field proto.ListOptions_RepoListField + var field webserverv1.ListOptions_RepoListField switch l.Field { case RepoListFieldRepos: - field = proto.ListOptions_REPO_LIST_FIELD_REPOS + field = webserverv1.ListOptions_REPO_LIST_FIELD_REPOS case RepoListFieldReposMap: - field = proto.ListOptions_REPO_LIST_FIELD_REPOS_MAP + field = webserverv1.ListOptions_REPO_LIST_FIELD_REPOS_MAP } - return &proto.ListOptions{ + return &webserverv1.ListOptions{ Field: field, } } -func ListOptionsFromProto(p *proto.ListOptions) *ListOptions { +func ListOptionsFromProto(p *webserverv1.ListOptions) *ListOptions { if p == nil { return nil } var field RepoListField switch p.GetField() { - case proto.ListOptions_REPO_LIST_FIELD_REPOS: + case webserverv1.ListOptions_REPO_LIST_FIELD_REPOS: field = RepoListFieldRepos - case proto.ListOptions_REPO_LIST_FIELD_REPOS_MAP: + case webserverv1.ListOptions_REPO_LIST_FIELD_REPOS_MAP: field = RepoListFieldReposMap } return &ListOptions{ @@ -684,7 +686,7 @@ func ListOptionsFromProto(p *proto.ListOptions) *ListOptions { } } -func SearchOptionsFromProto(p *proto.SearchOptions) *SearchOptions { +func SearchOptionsFromProto(p *webserverv1.SearchOptions) *SearchOptions { if p == nil { return nil } @@ -707,12 +709,12 @@ func SearchOptionsFromProto(p *proto.SearchOptions) *SearchOptions { } } -func (s *SearchOptions) ToProto() *proto.SearchOptions { +func (s *SearchOptions) ToProto() *webserverv1.SearchOptions { if s == nil { return nil } - return &proto.SearchOptions{ + return &webserverv1.SearchOptions{ EstimateDocCount: s.EstimateDocCount, Whole: s.Whole, ShardMaxMatchCount: int64(s.ShardMaxMatchCount), diff --git a/api_proto_test.go b/api_proto_test.go index 79d939571..98d3b11e3 100644 --- a/api_proto_test.go +++ b/api_proto_test.go @@ -25,12 +25,12 @@ import ( "testing/quick" "time" + fuzz "github.com/AdaLogics/go-fuzz-headers" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - webproto "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" "google.golang.org/protobuf/proto" - fuzz "github.com/AdaLogics/go-fuzz-headers" + webserverv1 "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" ) func TestProtoRoundtrip(t *testing.T) { @@ -413,8 +413,8 @@ var ( exampleSearchResultBytes []byte // The proto struct representation of the search result - exampleSearchResultProto = func() *webproto.SearchResponse { - sr := new(webproto.SearchResponse) + exampleSearchResultProto = func() *webserverv1.SearchResponse { + sr := new(webserverv1.SearchResponse) err := proto.Unmarshal(exampleSearchResultBytes, sr) if err != nil { panic(err) @@ -433,7 +433,7 @@ func BenchmarkGobRoundtrip(b *testing.B) { var buf bytes.Buffer enc := gob.NewEncoder(&buf) - for i := 0; i < count; i++ { + for range count { err := enc.Encode(exampleSearchResultGo) if err != nil { panic(err) @@ -442,7 +442,7 @@ func BenchmarkGobRoundtrip(b *testing.B) { } dec := gob.NewDecoder(&buf) - for i := 0; i < count; i++ { + for range count { var res SearchResult err := dec.Decode(&res) if err != nil { @@ -459,7 +459,7 @@ func BenchmarkProtoRoundtrip(b *testing.B) { b.Run(fmt.Sprintf("count=%d", count), func(b *testing.B) { for i := 0; i < b.N; i++ { buffers := make([][]byte, 0, count) - for i := 0; i < count; i++ { + for range count { buf, err := proto.Marshal(exampleSearchResultProto) if err != nil { b.Fatal(err) @@ -468,7 +468,7 @@ func BenchmarkProtoRoundtrip(b *testing.B) { } for _, buf := range buffers { - res := new(webproto.SearchResponse) + res := new(webserverv1.SearchResponse) err := proto.Unmarshal(buf, res) if err != nil { b.Fatal(err) diff --git a/api_test.go b/api_test.go index d37daf39a..55e0be5d8 100644 --- a/api_test.go +++ b/api_test.go @@ -17,6 +17,7 @@ package zoekt // import "github.com/sourcegraph/zoekt" import ( "bytes" "encoding/gob" + "encoding/json" "reflect" "strings" "testing" @@ -63,7 +64,7 @@ func BenchmarkMinimalRepoListEncodings(b *testing.B) { b.Run("map", benchmarkEncoding(mapData)) } -func benchmarkEncoding(data interface{}) func(*testing.B) { +func benchmarkEncoding(data any) func(*testing.B) { return func(b *testing.B) { b.Helper() @@ -150,12 +151,6 @@ func TestMatchSize(t *testing.T) { }, { v: ChunkMatch{}, size: 120, - }, { - v: candidateMatch{}, - size: 80, - }, { - v: candidateChunk{}, - size: 40, }} for _, c := range cases { got := reflect.TypeOf(c.v).Size() @@ -175,7 +170,7 @@ func TestSearchOptions_String(t *testing.T) { opts := SearchOptions{} var fieldNames []string rv := reflect.ValueOf(&opts).Elem() - for i := 0; i < rv.NumField(); i++ { + for i := range rv.NumField() { f := rv.Field(i) name := rv.Type().Field(i).Name fieldNames = append(fieldNames, name) @@ -394,3 +389,105 @@ func TestMonthsSince1970(t *testing.T) { }) } } + +// TestRepositoryUnmarshalJSONStackOverflowFix tests that the UnmarshalJSON method +// for Repository doesn't cause a stack overflow due to infinite recursion. +// This test creates a scenario that would trigger the bug where the type alias +// was incorrectly defined as a pointer type, causing recursive calls during +// JSON unmarshaling of nested SubRepoMap structures. +func TestRepositoryUnmarshalJSONStackOverflowFix(t *testing.T) { + // Create a JSON with nested SubRepoMap containing Repository objects + // This specifically tests the recursive scenario that causes stack overflow + jsonData := `{ + "id": 12345, + "name": "test-repo", + "url": "https://example.com/test-repo", + "branches": [ + { + "name": "main", + "version": "abc123" + } + ], + "rawconfig": { + "repoid": "12345" + }, + "subrepomap": { + "submodule1": { + "id": 11111, + "name": "submodule1", + "url": "https://example.com/submodule1", + "rawconfig": { + "repoid": "11111" + }, + "subrepomap": { + "nested-submodule": { + "id": 33333, + "name": "nested-submodule", + "rawconfig": { + "repoid": "33333" + } + } + } + }, + "submodule2": { + "id": 22222, + "name": "submodule2", + "rawconfig": { + "repoid": "22222" + } + } + } + }` + + // Attempt to unmarshal the JSON data into a Repository struct + // This would previously cause a stack overflow due to incorrect type alias + var repo Repository + err := json.Unmarshal([]byte(jsonData), &repo) + + // Verify that unmarshaling succeeds without stack overflow + if err != nil { + t.Fatalf("Failed to unmarshal Repository JSON: %v", err) + } + + // Basic verification + if repo.ID != 12345 { + t.Errorf("Expected ID 12345, got %d", repo.ID) + } + + if repo.Name != "test-repo" { + t.Errorf("Expected Name 'test-repo', got '%s'", repo.Name) + } + + // Verify nested SubRepoMap handling doesn't cause stack overflow + if repo.SubRepoMap == nil { + t.Fatalf("Expected SubRepoMap to be non-nil") + } + + if len(repo.SubRepoMap) != 2 { + t.Fatalf("Expected 2 subrepos in SubRepoMap, got %d", len(repo.SubRepoMap)) + } + + // Verify nested repository unmarshaling worked correctly + submodule1, exists := repo.SubRepoMap["submodule1"] + if !exists { + t.Fatalf("Expected submodule1 to exist in SubRepoMap") + } + + if submodule1.ID != 11111 { + t.Errorf("Expected submodule1 ID 11111, got %d", submodule1.ID) + } + + // Verify deeply nested SubRepoMap works + if submodule1.SubRepoMap == nil { + t.Fatalf("Expected submodule1 SubRepoMap to be non-nil") + } + + nestedSubmodule, exists := submodule1.SubRepoMap["nested-submodule"] + if !exists { + t.Fatalf("Expected nested-submodule to exist") + } + + if nestedSubmodule.ID != 33333 { + t.Errorf("Expected nested-submodule ID 33333, got %d", nestedSubmodule.ID) + } +} diff --git a/cmd/flags.go b/cmd/flags.go index b528aee34..3e278d55d 100644 --- a/cmd/flags.go +++ b/cmd/flags.go @@ -20,23 +20,22 @@ import ( "os" "path/filepath" - "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" ) var ( version = flag.Bool("version", false, "Print version number") - opts = &build.Options{} + opts = &index.Options{} ) func init() { opts.Flags(flag.CommandLine) } -func OptionsFromFlags() *build.Options { +func OptionsFromFlags() *index.Options { if *version { name := filepath.Base(os.Args[0]) - fmt.Printf("%s version %q\n", name, zoekt.Version) + fmt.Printf("%s version %q\n", name, index.Version) os.Exit(0) } diff --git a/cmd/zoekt-archive-index/main.go b/cmd/zoekt-archive-index/main.go index 68817cf9e..999ef134f 100644 --- a/cmd/zoekt-archive-index/main.go +++ b/cmd/zoekt-archive-index/main.go @@ -1,6 +1,6 @@ -// Command zoekt-archive-index indexes an archive. +// Command zoekt-archive-index indexes a git archive. // -// Example via github.com: +// Examples using github.com: // // zoekt-archive-index -incremental -commit b57cb1605fd11ba2ecfa7f68992b4b9cc791934d -name github.com/gorilla/mux -strip_components 1 https://codeload.github.com/gorilla/mux/legacy.tar.gz/b57cb1605fd11ba2ecfa7f68992b4b9cc791934d // diff --git a/cmd/zoekt-dynamic-indexserver/main.go b/cmd/zoekt-dynamic-indexserver/main.go index fd9244528..e805312f0 100644 --- a/cmd/zoekt-dynamic-indexserver/main.go +++ b/cmd/zoekt-dynamic-indexserver/main.go @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This program manages a zoekt dynamic indexing deployment: +// Command zoekt-dynamic-indexserver starts a server to manage dynamic indexing. In contrast to +// zoekt-indexserver, it's designed for a "push-based" indexing model. The server // * listens to indexing commands // * reindexes specified repositories - package main import ( diff --git a/cmd/zoekt-git-clone/main.go b/cmd/zoekt-git-clone/main.go index d8af5bc83..4a721f329 100644 --- a/cmd/zoekt-git-clone/main.go +++ b/cmd/zoekt-git-clone/main.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This binary fetches all repos of a user or organization and clones +// Command zoekt-git-clone fetches all repos of a user or organization and clones // them. It is strongly recommended to get a personal API token from // https://github.com/settings/tokens, save the token in a file, and // point the --token option to it. diff --git a/cmd/zoekt-git-index/main.go b/cmd/zoekt-git-index/main.go index cde395800..8797d98e0 100644 --- a/cmd/zoekt-git-index/main.go +++ b/cmd/zoekt-git-index/main.go @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +// Command zoekt-git-index indexes a single git repository. It works directly with git +// repositories and supports git-specific features like branches and submodules. package main import ( @@ -25,11 +27,10 @@ import ( "github.com/dustin/go-humanize" "go.uber.org/automaxprocs/maxprocs" - "github.com/sourcegraph/zoekt/internal/profiler" - "github.com/sourcegraph/zoekt/cmd" - "github.com/sourcegraph/zoekt/ctags" "github.com/sourcegraph/zoekt/gitindex" + "github.com/sourcegraph/zoekt/internal/ctags" + "github.com/sourcegraph/zoekt/internal/profiler" ) func run() int { @@ -48,7 +49,7 @@ func run() int { tenantID := flag.Int("tenant_id", 0, "tenant ID to use for indexed repositories") repoID := flag.Uint("repo_id", 0, "opaque ID to use for indexed repositories. Surfaces as `RepositoryID` in the REST search response.") - cpuProfile := flag.String("cpuprofile", "", "write cpu profile to `file`") + cpuProfile := flag.String("cpu_profile", "", "write cpu profile to `file`") flag.Parse() diff --git a/cmd/zoekt-index/main.go b/cmd/zoekt-index/main.go index 4e4ee0216..2c6647f8e 100644 --- a/cmd/zoekt-index/main.go +++ b/cmd/zoekt-index/main.go @@ -12,9 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +// Command zoekt-index indexes a directory of files. package main import ( + "encoding/json" "flag" "fmt" "log" @@ -23,10 +25,10 @@ import ( "runtime/pprof" "strings" - "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" - "github.com/sourcegraph/zoekt/cmd" "go.uber.org/automaxprocs/maxprocs" + + "github.com/sourcegraph/zoekt/cmd" + "github.com/sourcegraph/zoekt/index" ) type fileInfo struct { @@ -61,6 +63,7 @@ func (a *fileAggregator) add(path string, info os.FileInfo, err error) error { func main() { cpuProfile := flag.String("cpu_profile", "", "write cpu profile to file") ignoreDirs := flag.String("ignore_dirs", ".git,.hg,.svn", "comma separated list of directories to ignore.") + metaFile := flag.String("meta", "", "path to .meta JSON file with repository description") flag.Parse() if flag.NArg() == 0 { @@ -95,6 +98,18 @@ func main() { } } } + + if *metaFile != "" { + // Read and parse the .meta JSON file into opts.RepositoryDescription + data, err := os.ReadFile(*metaFile) + if err != nil { + log.Fatalf("failed to read .meta file %s: %v", *metaFile, err) + } + if err := json.Unmarshal(data, &opts.RepositoryDescription); err != nil { + log.Fatalf("failed to decode .meta file %s: %v", *metaFile, err) + } + } + for _, arg := range flag.Args() { opts.RepositoryDescription.Source = arg if err := indexArg(arg, *opts, ignoreDirMap); err != nil { @@ -103,14 +118,14 @@ func main() { } } -func indexArg(arg string, opts build.Options, ignore map[string]struct{}) error { +func indexArg(arg string, opts index.Options, ignore map[string]struct{}) error { dir, err := filepath.Abs(filepath.Clean(arg)) if err != nil { return err } opts.RepositoryDescription.Name = filepath.Base(dir) - builder, err := build.NewBuilder(opts) + builder, err := index.NewBuilder(opts) if err != nil { return err } @@ -135,9 +150,9 @@ func indexArg(arg string, opts build.Options, ignore map[string]struct{}) error for f := range comm { displayName := strings.TrimPrefix(f.name, dir+"/") if f.size > int64(opts.SizeMax) && !opts.IgnoreSizeMax(displayName) { - if err := builder.Add(zoekt.Document{ + if err := builder.Add(index.Document{ Name: displayName, - SkipReason: fmt.Sprintf("document size %d larger than limit %d", f.size, opts.SizeMax), + SkipReason: index.SkipReasonTooLarge, }); err != nil { return err } diff --git a/cmd/zoekt-indexserver/config.go b/cmd/zoekt-indexserver/config.go index b9f2661ef..ff484c50d 100644 --- a/cmd/zoekt-indexserver/config.go +++ b/cmd/zoekt-indexserver/config.go @@ -38,6 +38,9 @@ type ConfigEntry struct { GitilesURL string CGitURL string BitBucketServerURL string + GiteaURL string + GiteaUser string + GiteaOrg string DisableTLS bool CredentialPath string ProjectType string @@ -50,9 +53,12 @@ type ConfigEntry struct { ExcludeTopics []string Active bool NoArchived bool + KeepDeleted bool GerritFetchMetaConfig bool GerritRepoNameFormat string ExcludeUserRepos bool + Forks bool + Visibility []string } func randomize(entries []ConfigEntry) []ConfigEntry { @@ -169,7 +175,7 @@ func executeMirror(cfg []ConfigEntry, repoDir string, pendingRepos chan<- string var cmd *exec.Cmd if c.GitHubURL != "" || c.GithubUser != "" || c.GithubOrg != "" { cmd = exec.Command("zoekt-mirror-github", - "-dest", repoDir, "-delete") + "-dest", repoDir) if c.GitHubURL != "" { cmd.Args = append(cmd.Args, "-url", c.GitHubURL) } @@ -196,6 +202,15 @@ func executeMirror(cfg []ConfigEntry, repoDir string, pendingRepos chan<- string if c.NoArchived { cmd.Args = append(cmd.Args, "-no_archived") } + if !c.KeepDeleted { + cmd.Args = append(cmd.Args, "-delete") + } + if c.Forks { + cmd.Args = append(cmd.Args, "-forks") + } + for _, v := range c.Visibility { + cmd.Args = append(cmd.Args, "-visibility", v) + } } else if c.GitilesURL != "" { cmd = exec.Command("zoekt-mirror-gitiles", "-dest", repoDir, "-name", c.Name) @@ -213,7 +228,7 @@ func executeMirror(cfg []ConfigEntry, repoDir string, pendingRepos chan<- string cmd.Args = append(cmd.Args, c.CGitURL) } else if c.BitBucketServerURL != "" { cmd = exec.Command("zoekt-mirror-bitbucket-server", - "-dest", repoDir, "-url", c.BitBucketServerURL, "-delete") + "-dest", repoDir, "-url", c.BitBucketServerURL) if c.BitBucketServerProject != "" { cmd.Args = append(cmd.Args, "-project", c.BitBucketServerProject) } @@ -232,6 +247,9 @@ func executeMirror(cfg []ConfigEntry, repoDir string, pendingRepos chan<- string if c.CredentialPath != "" { cmd.Args = append(cmd.Args, "-credentials", c.CredentialPath) } + if !c.KeepDeleted { + cmd.Args = append(cmd.Args, "-delete") + } } else if c.GitLabURL != "" { cmd = exec.Command("zoekt-mirror-gitlab", "-dest", repoDir, "-url", c.GitLabURL) @@ -253,9 +271,12 @@ func executeMirror(cfg []ConfigEntry, repoDir string, pendingRepos chan<- string if c.NoArchived { cmd.Args = append(cmd.Args, "-no_archived") } + if !c.KeepDeleted { + cmd.Args = append(cmd.Args, "-delete") + } } else if c.GerritApiURL != "" { cmd = exec.Command("zoekt-mirror-gerrit", - "-dest", repoDir, "-delete") + "-dest", repoDir) if c.CredentialPath != "" { cmd.Args = append(cmd.Args, "-http-credentials", c.CredentialPath) } @@ -274,7 +295,44 @@ func executeMirror(cfg []ConfigEntry, repoDir string, pendingRepos chan<- string if c.GerritRepoNameFormat != "" { cmd.Args = append(cmd.Args, "-repo-name-format", c.GerritRepoNameFormat) } + if !c.KeepDeleted { + cmd.Args = append(cmd.Args, "-delete") + } cmd.Args = append(cmd.Args, c.GerritApiURL) + } else if c.GiteaURL != "" { + cmd = exec.Command("zoekt-mirror-gitea", "-dest", repoDir) + if c.GiteaURL != "" { + cmd.Args = append(cmd.Args, "-url", c.GiteaURL) + } + if c.GiteaUser != "" { + cmd.Args = append(cmd.Args, "-user", c.GiteaUser) + } else if c.GiteaOrg != "" { + cmd.Args = append(cmd.Args, "-org", c.GiteaOrg) + } + if c.Name != "" { + cmd.Args = append(cmd.Args, "-name", c.Name) + } + if c.Exclude != "" { + cmd.Args = append(cmd.Args, "-exclude", c.Exclude) + } + if c.CredentialPath != "" { + cmd.Args = append(cmd.Args, "-token", c.CredentialPath) + } + for _, topic := range c.Topics { + cmd.Args = append(cmd.Args, "-topic", topic) + } + for _, topic := range c.ExcludeTopics { + cmd.Args = append(cmd.Args, "-exclude_topic", topic) + } + if c.NoArchived { + cmd.Args = append(cmd.Args, "-no_archived") + } + if !c.KeepDeleted { + cmd.Args = append(cmd.Args, "-delete") + } + if c.Forks { + cmd.Args = append(cmd.Args, "-forks") + } } else { log.Printf("executeMirror: ignoring config, because it does not contain any valid repository definition: %v", c) continue diff --git a/cmd/zoekt-indexserver/main.go b/cmd/zoekt-indexserver/main.go index 2107503a6..6e54ca710 100644 --- a/cmd/zoekt-indexserver/main.go +++ b/cmd/zoekt-indexserver/main.go @@ -12,11 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This program manages a zoekt indexing deployment: -// * recycling logs -// * periodically fetching new data. -// * periodically reindexing all git repos. - +// Command zoekt-indexserver starts a service that periodically reindexes repositories. It follows +// a "pull-based" design, where it reaches out to code hosts to fetch new data. package main import ( @@ -33,8 +30,8 @@ import ( "strings" "time" - "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/gitindex" + "github.com/sourcegraph/zoekt/index" ) const day = time.Hour * 24 @@ -71,10 +68,7 @@ func (o *Options) validate() { log.Fatal("cpu_fraction must be between 0.0 and 1.0") } - o.cpuCount = int(math.Trunc(float64(runtime.GOMAXPROCS(0)) * o.cpuFraction)) - if o.cpuCount < 1 { - o.cpuCount = 1 - } + o.cpuCount = max(int(math.Trunc(float64(runtime.GOMAXPROCS(0))*o.cpuFraction)), 1) if o.indexFlagsStr != "" { o.indexFlags = strings.Split(o.indexFlagsStr, " ") } @@ -206,13 +200,13 @@ func deleteIfOrphan(repoDir string, fn string) error { } defer f.Close() - ifile, err := zoekt.NewIndexFile(f) + ifile, err := index.NewIndexFile(f) if err != nil { return nil } defer ifile.Close() - repos, _, err := zoekt.ReadMetadata(ifile) + repos, _, err := index.ReadMetadata(ifile) if err != nil { return nil } diff --git a/cmd/zoekt-merge-index/main.go b/cmd/zoekt-merge-index/main.go index 899d52b93..080252e78 100644 --- a/cmd/zoekt-merge-index/main.go +++ b/cmd/zoekt-merge-index/main.go @@ -1,3 +1,16 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Command zoekt-merge-index merges a set of index shards into a compound shard. package main import ( @@ -8,13 +21,13 @@ import ( "path/filepath" "strings" - "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" ) // merge merges the input shards into a compound shard in dstDir. It returns the // full path to the compound shard. The input shards are removed on success. func merge(dstDir string, names []string) (string, error) { - var files []zoekt.IndexFile + var files []index.IndexFile for _, fn := range names { f, err := os.Open(fn) if err != nil { @@ -22,7 +35,7 @@ func merge(dstDir string, names []string) (string, error) { } defer f.Close() - indexFile, err := zoekt.NewIndexFile(f) + indexFile, err := index.NewIndexFile(f) if err != nil { return "", err } @@ -31,14 +44,14 @@ func merge(dstDir string, names []string) (string, error) { files = append(files, indexFile) } - tmpName, dstName, err := zoekt.Merge(dstDir, files...) + tmpName, dstName, err := index.Merge(dstDir, files...) if err != nil { return "", err } // Delete input shards. for _, name := range names { - paths, err := zoekt.IndexFilePaths(name) + paths, err := index.IndexFilePaths(name) if err != nil { return "", fmt.Errorf("zoekt-merge-index: %w", err) } @@ -74,59 +87,8 @@ func mergeCmd(paths []string) (string, error) { return merge(filepath.Dir(paths[0]), paths) } -// explode splits the input shard into individual shards and places them in dstDir. -// Temporary files created in the process are removed on a best effort basis. -func explode(dstDir string, inputShard string) error { - f, err := os.Open(inputShard) - if err != nil { - return err - } - defer f.Close() - - indexFile, err := zoekt.NewIndexFile(f) - if err != nil { - return err - } - defer indexFile.Close() - - exploded, err := zoekt.Explode(dstDir, indexFile) - defer func() { - // best effort removal of tmp files. If os.Remove fails, indexserver will delete - // the leftover tmp files during the next cleanup. - for tmpFn := range exploded { - os.Remove(tmpFn) - } - }() - if err != nil { - return fmt.Errorf("zoekt.Explode: %w", err) - } - - // remove the input shard first to avoid duplicate indexes. In the worst case, - // the process is interrupted just after we delete the compound shard, in which - // case we have to reindex the lost repos. - paths, err := zoekt.IndexFilePaths(inputShard) - if err != nil { - return err - } - for _, path := range paths { - err = os.Remove(path) - if err != nil { - return err - } - } - - // best effort rename shards. - for tmpFn, dstFn := range exploded { - if err := os.Rename(tmpFn, dstFn); err != nil { - log.Printf("explode: rename failed: %s", err) - } - } - - return nil -} - func explodeCmd(path string) error { - return explode(filepath.Dir(path), path) + return index.Explode(filepath.Dir(path), path) } func main() { diff --git a/cmd/zoekt-merge-index/main_test.go b/cmd/zoekt-merge-index/main_test.go index 5622cb295..44ca5a734 100644 --- a/cmd/zoekt-merge-index/main_test.go +++ b/cmd/zoekt-merge-index/main_test.go @@ -11,8 +11,9 @@ import ( "github.com/stretchr/testify/require" "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/query" - "github.com/sourcegraph/zoekt/shards" + "github.com/sourcegraph/zoekt/search" ) func TestMerge(t *testing.T) { @@ -31,7 +32,7 @@ func TestMerge(t *testing.T) { // stable require.Equal(t, filepath.Base(cs), "compound-ea9613e2ffba7d7361856aebfca75fb714856509_v17.00000.zoekt") - ss, err := shards.NewDirectorySearcher(dir) + ss, err := search.NewDirectorySearcher(dir) require.NoError(t, err) defer ss.Close() @@ -63,7 +64,7 @@ func TestExplode(t *testing.T) { cs, err := filepath.Glob(filepath.Join(dir, "compound-*.zoekt")) require.NoError(t, err) - err = explode(dir, cs[0]) + err = index.Explode(dir, cs[0]) require.NoError(t, err) cs, err = filepath.Glob(filepath.Join(dir, "compound-*.zoekt")) @@ -80,7 +81,7 @@ func TestExplode(t *testing.T) { t.Fatalf("the number of simple shards before %d and after %d should be the same", len(testShards), len(exploded)) } - ss, err := shards.NewDirectorySearcher(dir) + ss, err := search.NewDirectorySearcher(dir) require.NoError(t, err) defer ss.Close() diff --git a/cmd/zoekt-mirror-bitbucket-server/main.go b/cmd/zoekt-mirror-bitbucket-server/main.go index d3610fd79..f5f25c198 100644 --- a/cmd/zoekt-mirror-bitbucket-server/main.go +++ b/cmd/zoekt-mirror-bitbucket-server/main.go @@ -10,9 +10,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This binary fetches all repos of a project, and of a specific type, in case -// these are specified, and clones them. By default it fetches and clones all -// existing repos. +// Command zoekt-mirror-bitbucket-server fetches all repos of a bitbucket project, +// optionally of a specific type, and clones them. package main import ( @@ -180,7 +179,7 @@ func IsValidProjectType(projectType string) bool { func getAllRepos(client bitbucketv1.APIClient) ([]bitbucketv1.Repository, error) { var allRepos []bitbucketv1.Repository - opts := map[string]interface{}{ + opts := map[string]any{ "limit": 1000, "start": 0, } @@ -209,7 +208,7 @@ func getAllRepos(client bitbucketv1.APIClient) ([]bitbucketv1.Repository, error) func getProjectRepos(client bitbucketv1.APIClient, projectName string) ([]bitbucketv1.Repository, error) { var allRepos []bitbucketv1.Repository - opts := map[string]interface{}{ + opts := map[string]any{ "limit": 1000, "start": 0, } diff --git a/cmd/zoekt-mirror-gerrit/main.go b/cmd/zoekt-mirror-gerrit/main.go index 2336723b1..71d45c812 100644 --- a/cmd/zoekt-mirror-gerrit/main.go +++ b/cmd/zoekt-mirror-gerrit/main.go @@ -12,8 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This binary fetches all repos of a Gerrit host. - +// Command zoekt-mirror-gerrit fetches all repos of a Gerrit host. package main import ( @@ -34,6 +33,7 @@ import ( gerrit "github.com/andygrunwald/go-gerrit" git "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/config" + "github.com/sourcegraph/zoekt/gitindex" ) @@ -147,8 +147,6 @@ func main() { projectURL = schemeInfo.URL if s == "http" && schemeInfo.IsAuthRequired { projectURL = addPassword(projectURL, rootURL.User) - // remove "/a/" prefix needed for API call with basic auth but not with git command → cleaner repo name - projectURL = strings.Replace(projectURL, "/a/${project}", "/${project}", 1) } break } diff --git a/cmd/zoekt-mirror-gitea/main.go b/cmd/zoekt-mirror-gitea/main.go index 6918ccdcc..0f548bb31 100644 --- a/cmd/zoekt-mirror-gitea/main.go +++ b/cmd/zoekt-mirror-gitea/main.go @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This binary fetches all repos of a user or organization and clones -// them. It is strongly recommended to get a personal API token from -// https://gitea.com/user/settings/applications, save the token in a -// file, and point the --token option to it. +// Command zoekt-mirror-gitea fetches all repos of a gitea user or organization +// and clones them. It is strongly recommended to get a personal API token from +// https://gitea.com/user/settings/applications, save the token in a file, and point +// the --token option to it. package main import ( @@ -89,7 +89,10 @@ func main() { if err != nil { log.Fatal(err) } - clientOptions = append(clientOptions, gitea.SetToken(string(content))) + contentStr := string(content) + // Editors tend to insert newlines that make the token invalid, so clean it up + contentStr = strings.TrimSpace(contentStr) + clientOptions = append(clientOptions, gitea.SetToken(contentStr)) } client, err := gitea.NewClient(*giteaURL, clientOptions...) if err != nil { @@ -109,7 +112,7 @@ func main() { repos, err = getUserRepos(client, *user, reposFilters) default: log.Printf("no user or org specified, cloning all repos.") - repos, err = getUserRepos(client, "", reposFilters) + repos, err = getAllRepos(client, reposFilters) } if err != nil { @@ -193,17 +196,8 @@ func filterRepositories(repos []*gitea.Repository, noArchived bool) (filteredRep return } -func getOrgRepos(client *gitea.Client, org string, reposFilters reposFilters) ([]*gitea.Repository, error) { +func searchRepos(client *gitea.Client, searchOptions *gitea.SearchRepoOptions, reposFilters reposFilters) ([]*gitea.Repository, error) { var allRepos []*gitea.Repository - searchOptions := &gitea.SearchRepoOptions{} - // OwnerID - organization, _, err := client.GetOrg(org) - if err != nil { - return nil, err - } - - searchOptions.OwnerID = organization.ID - for { repos, resp, err := client.SearchRepos(*searchOptions) if err != nil { @@ -213,40 +207,43 @@ func getOrgRepos(client *gitea.Client, org string, reposFilters reposFilters) ([ break } - searchOptions.Page = resp.NextPage repos = filterRepositories(repos, *reposFilters.noArchived) allRepos = append(allRepos, repos...) + searchOptions.Page = resp.NextPage if resp.NextPage == 0 { break } } + return allRepos, nil } +func getOrgRepos(client *gitea.Client, org string, reposFilters reposFilters) ([]*gitea.Repository, error) { + searchOptions := &gitea.SearchRepoOptions{} + // OwnerID + organization, _, err := client.GetOrg(org) + if err != nil { + return nil, err + } + + searchOptions.OwnerID = organization.ID + + return searchRepos(client, searchOptions, reposFilters) +} + +func getAllRepos(client *gitea.Client, reposFilters reposFilters) ([]*gitea.Repository, error) { + return searchRepos(client, &gitea.SearchRepoOptions{}, reposFilters) +} + func getUserRepos(client *gitea.Client, user string, reposFilters reposFilters) ([]*gitea.Repository, error) { - var allRepos []*gitea.Repository searchOptions := &gitea.SearchRepoOptions{} u, _, err := client.GetUserInfo(user) if err != nil { return nil, err } searchOptions.OwnerID = u.ID - for { - repos, resp, err := client.SearchRepos(*searchOptions) - if err != nil { - return nil, err - } - if len(repos) == 0 { - break - } - repos = filterRepositories(repos, *reposFilters.noArchived) - allRepos = append(allRepos, repos...) - searchOptions.Page = resp.NextPage - if resp.NextPage == 0 { - break - } - } - return allRepos, nil + + return searchRepos(client, searchOptions, reposFilters) } func cloneRepos(destDir string, repos []*gitea.Repository) error { diff --git a/cmd/zoekt-mirror-github/main.go b/cmd/zoekt-mirror-github/main.go index b14842024..99dd90439 100644 --- a/cmd/zoekt-mirror-github/main.go +++ b/cmd/zoekt-mirror-github/main.go @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This binary fetches all repos of a user or organization and clones -// them. It is strongly recommended to get a personal API token from -// https://github.com/settings/tokens, save the token in a file, and -// point the --token option to it. +// Command zoekt-mirror-github fetches all repos of a github user or organization +// and clones them. It is strongly recommended to get a personal API token from +// https://github.com/settings/tokens, save the token in a file, and point the +// --token option to it. package main import ( @@ -23,13 +23,14 @@ import ( "flag" "fmt" "log" + "net/http" "net/url" "os" "path/filepath" "strconv" "strings" - "github.com/google/go-github/v27/github" + "github.com/google/go-github/v78/github" "golang.org/x/oauth2" "github.com/sourcegraph/zoekt/gitindex" @@ -50,6 +51,7 @@ type reposFilters struct { topics []string excludeTopics []string noArchived *bool + visibility []string } func main() { @@ -57,9 +59,7 @@ func main() { githubURL := flag.String("url", "", "GitHub Enterprise url. If not set github.com will be used as the host.") org := flag.String("org", "", "organization to mirror") user := flag.String("user", "", "user to mirror") - token := flag.String("token", - filepath.Join(os.Getenv("HOME"), ".github-token"), - "file holding API token.") + token := flag.String("token", "", "file holding API token. If not set defaults to $HOME/.github-token if present, else uses unauthenticated GitHub client.") forks := flag.Bool("forks", false, "also mirror forks.") deleteRepos := flag.Bool("delete", false, "delete missing repos") namePattern := flag.String("name", "", "only clone repos whose name matches the given regexp.") @@ -69,6 +69,8 @@ func main() { excludeTopics := topicsFlag{} flag.Var(&excludeTopics, "exclude_topic", "don't clone repos whose have one of given topics. You can add multiple topics by setting this more than once.") noArchived := flag.Bool("no_archived", false, "mirror only projects that are not archived") + visibility := topicsFlag{} + flag.Var(&visibility, "visibility", "filter repos by visibility (public, private, internal). You can add multiple values by setting this more than once.") flag.Parse() @@ -80,8 +82,8 @@ func main() { } var host string - var apiBaseURL string var client *github.Client + tc := newOAuthClient(token) if *githubURL != "" { rootURL, err := url.Parse(*githubURL) if err != nil { @@ -92,46 +94,25 @@ func main() { if err != nil { log.Fatal(err) } - apiBaseURL = rootURL.ResolveReference(apiPath).String() - client, err = github.NewEnterpriseClient(apiBaseURL, apiBaseURL, nil) + apiBaseURL := rootURL.ResolveReference(apiPath).String() + client, err = github.NewEnterpriseClient(apiBaseURL, apiBaseURL, tc) if err != nil { log.Fatal(err) } } else { host = "github.com" - apiBaseURL = "https://github.com/" - client = github.NewClient(nil) + client = github.NewClient(tc) } destDir := filepath.Join(*dest, host) if err := os.MkdirAll(destDir, 0o755); err != nil { log.Fatal(err) } - if *token != "" { - content, err := os.ReadFile(*token) - if err != nil { - log.Fatal(err) - } - - ts := oauth2.StaticTokenSource( - &oauth2.Token{ - AccessToken: strings.TrimSpace(string(content)), - }) - tc := oauth2.NewClient(context.Background(), ts) - if *githubURL != "" { - client, err = github.NewEnterpriseClient(apiBaseURL, apiBaseURL, tc) - if err != nil { - log.Fatal(err) - } - } else { - client = github.NewClient(tc) - } - } - reposFilters := reposFilters{ topics: topics, excludeTopics: excludeTopics, noArchived: noArchived, + visibility: visibility, } var repos []*github.Repository var err error @@ -184,6 +165,32 @@ func main() { } } +func newOAuthClient(token *string) *http.Client { + var content []byte + var err error + + if *token != "" { // user explicitly provided a token which must exist + content, err = os.ReadFile(*token) + if err != nil { + log.Fatal(err) + } + } else { + defaultToken := filepath.Join(os.Getenv("HOME"), ".github-token") + content, err = os.ReadFile(defaultToken) + if err != nil && os.IsNotExist(err) { // use unauthenticated client + return nil + } else if err != nil { + log.Fatal(err) + } + } + + ts := oauth2.StaticTokenSource( + &oauth2.Token{ + AccessToken: strings.TrimSpace(string(content)), + }) + return oauth2.NewClient(context.Background(), ts) +} + func deleteStaleRepos(destDir string, filter *gitindex.Filter, repos []*github.Repository, user string) error { var baseURL string if len(repos) > 0 { @@ -225,11 +232,14 @@ func hasIntersection(s1, s2 []string) bool { return false } -func filterRepositories(repos []*github.Repository, include []string, exclude []string, noArchived bool) (filteredRepos []*github.Repository) { +func filterRepositories(repos []*github.Repository, include []string, exclude []string, noArchived bool, visibility []string) (filteredRepos []*github.Repository) { for _, repo := range repos { if noArchived && *repo.Archived { continue } + if len(visibility) > 0 && !hasIntersection(visibility, []string{repo.GetVisibility()}) { + continue + } if (len(include) == 0 || hasIntersection(include, repo.Topics)) && !hasIntersection(exclude, repo.Topics) { filteredRepos = append(filteredRepos, repo) @@ -251,7 +261,7 @@ func getOrgRepos(client *github.Client, org string, reposFilters reposFilters) ( } opt.Page = resp.NextPage - repos = filterRepositories(repos, reposFilters.topics, reposFilters.excludeTopics, *reposFilters.noArchived) + repos = filterRepositories(repos, reposFilters.topics, reposFilters.excludeTopics, *reposFilters.noArchived, reposFilters.visibility) allRepos = append(allRepos, repos...) if resp.NextPage == 0 { break @@ -273,7 +283,7 @@ func getUserRepos(client *github.Client, user string, reposFilters reposFilters) } opt.Page = resp.NextPage - repos = filterRepositories(repos, reposFilters.topics, reposFilters.excludeTopics, *reposFilters.noArchived) + repos = filterRepositories(repos, reposFilters.topics, reposFilters.excludeTopics, *reposFilters.noArchived, reposFilters.visibility) allRepos = append(allRepos, repos...) if resp.NextPage == 0 { break @@ -282,11 +292,10 @@ func getUserRepos(client *github.Client, user string, reposFilters reposFilters) return allRepos, nil } -func itoa(p *int) string { - if p != nil { - return strconv.Itoa(*p) +func setOptionalIntConfig(config map[string]string, key string, value *int) { + if value != nil { + config[key] = strconv.Itoa(*value) } - return "" } func cloneRepos(destDir string, repos []*github.Repository) error { @@ -301,15 +310,15 @@ func cloneRepos(destDir string, repos []*github.Repository) error { "zoekt.web-url": *r.HTMLURL, "zoekt.name": filepath.Join(host.Hostname(), *r.FullName), - "zoekt.github-stars": itoa(r.StargazersCount), - "zoekt.github-watchers": itoa(r.WatchersCount), - "zoekt.github-subscribers": itoa(r.SubscribersCount), - "zoekt.github-forks": itoa(r.ForksCount), - "zoekt.archived": marshalBool(r.Archived != nil && *r.Archived), "zoekt.fork": marshalBool(r.Fork != nil && *r.Fork), "zoekt.public": marshalBool(r.Private == nil || !*r.Private), } + setOptionalIntConfig(config, "zoekt.github-stars", r.StargazersCount) + setOptionalIntConfig(config, "zoekt.github-watchers", r.WatchersCount) + setOptionalIntConfig(config, "zoekt.github-subscribers", r.SubscribersCount) + setOptionalIntConfig(config, "zoekt.github-forks", r.ForksCount) + dest, err := gitindex.CloneRepo(destDir, *r.FullName, *r.CloneURL, config) if err != nil { return err diff --git a/cmd/zoekt-mirror-gitiles/main.go b/cmd/zoekt-mirror-gitiles/main.go index e05f6b40a..4625b22fe 100644 --- a/cmd/zoekt-mirror-gitiles/main.go +++ b/cmd/zoekt-mirror-gitiles/main.go @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This binary fetches all repos of a Gitiles host. It does double -// duty for other "simple" web hosts +// Command zoekt-mirror-gitiles fetches all repos of a Gitiles host. +// It does double duty for other "simple" web hosts. package main import ( diff --git a/cmd/zoekt-mirror-gitlab/main.go b/cmd/zoekt-mirror-gitlab/main.go index 55d3fee07..5937db36a 100644 --- a/cmd/zoekt-mirror-gitlab/main.go +++ b/cmd/zoekt-mirror-gitlab/main.go @@ -10,13 +10,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This binary fetches all repos for a user from gitlab. +// Command zoekt-mirror-gitlab fetches all repos for a user from gitlab. // // It is recommended to use a gitlab personal access token: // https://docs.gitlab.com/ce/user/profile/personal_access_tokens.html. This // token should be stored in a file and the --token option should be used. // In addition, the token should be present in the ~/.netrc of the user running -// the mirror command. For example, the ~/.netrc may look like: +// Command mirror. For example, the ~/.netrc may look like: // // machine gitlab.com // login oauth @@ -34,8 +34,9 @@ import ( "strings" "time" + gitlab "gitlab.com/gitlab-org/api/client-go" + "github.com/sourcegraph/zoekt/gitindex" - gitlab "github.com/xanzy/go-gitlab" ) func main() { @@ -86,12 +87,12 @@ func main() { ListOptions: gitlab.ListOptions{ PerPage: 100, }, - Sort: gitlab.String("asc"), - OrderBy: gitlab.String("id"), + Sort: gitlab.Ptr("asc"), + OrderBy: gitlab.Ptr("id"), Membership: isMember, } if *isPublic { - opt.Visibility = gitlab.Visibility(gitlab.PublicVisibility) + opt.Visibility = gitlab.Ptr(gitlab.PublicVisibility) } if *lastActivityAfter != "" { @@ -99,11 +100,11 @@ func main() { if err != nil { log.Fatal(err) } - opt.LastActivityAfter = gitlab.Time(targetDate) + opt.LastActivityAfter = gitlab.Ptr(targetDate) } if *noArchived { - opt.Archived = gitlab.Bool(false) + opt.Archived = gitlab.Ptr(false) } var gitlabProjects []*gitlab.Project @@ -192,8 +193,8 @@ func fetchProjects(destDir, token string, projects []*gitlab.Project) { "zoekt.web-url": p.WebURL, "zoekt.name": filepath.Join(u.Hostname(), p.PathWithNamespace), - "zoekt.gitlab-stars": strconv.Itoa(p.StarCount), - "zoekt.gitlab-forks": strconv.Itoa(p.ForksCount), + "zoekt.gitlab-stars": strconv.FormatInt(p.StarCount, 10), + "zoekt.gitlab-forks": strconv.FormatInt(p.ForksCount, 10), "zoekt.archived": marshalBool(p.Archived), "zoekt.fork": marshalBool(p.ForkedFromProject != nil), diff --git a/cmd/zoekt-repo-index/main.go b/cmd/zoekt-repo-index/main.go index 625cb3180..863b72ebf 100644 --- a/cmd/zoekt-repo-index/main.go +++ b/cmd/zoekt-repo-index/main.go @@ -12,9 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -// zoekt-repo-index indexes a repo-based repository. The constituent git +// Command zoekt-repo-index indexes repository that uses the Android 'repo' +// tool (https://android.googlesource.com/tools/repo). The constituent git // repositories should already have been downloaded to the --repo_cache -// directory, eg. +// directory, for example: // // go install github.com/sourcegraph/zoekt/cmd/zoekt-repo-index && // @@ -39,15 +40,15 @@ import ( "sort" "strings" + git "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" "github.com/google/slothfs/manifest" + "go.uber.org/automaxprocs/maxprocs" + "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" "github.com/sourcegraph/zoekt/gitindex" "github.com/sourcegraph/zoekt/ignore" - "go.uber.org/automaxprocs/maxprocs" - - git "github.com/go-git/go-git/v5" - "github.com/go-git/go-git/v5/plumbing" + "github.com/sourcegraph/zoekt/index" ) var _ = log.Println @@ -127,7 +128,7 @@ func main() { revPrefix := flag.String("rev_prefix", "refs/remotes/origin/", "prefix for references") baseURLStr := flag.String("base_url", "", "base url to interpret repository names") repoCacheDir := flag.String("repo_cache", "", "root for repository cache") - indexDir := flag.String("index", build.DefaultDir, "index directory for *.zoekt files") + indexDir := flag.String("index", index.DefaultDir, "index directory for *.zoekt files") manifestRepoURL := flag.String("manifest_repo_url", "", "set a URL for a git repository holding manifest XML file. Provide the BRANCH:XML-FILE as further command-line arguments") manifestRevPrefix := flag.String("manifest_rev_prefix", "refs/remotes/origin/", "prefixes for branches in manifest repository") repoName := flag.String("name", "", "set repository name") @@ -150,7 +151,7 @@ func main() { *repoName = filepath.Join(u.Host, u.Path) } - opts := build.Options{ + opts := index.Options{ Parallelism: *parallelism, SizeMax: *sizeMax, ShardMax: *shardLimit, @@ -258,7 +259,7 @@ func main() { return } - builder, err := build.NewBuilder(opts) + builder, err := index.NewBuilder(opts) if err != nil { log.Fatal(err) } @@ -269,7 +270,7 @@ func main() { log.Fatal(err) } - doc := zoekt.Document{ + doc := index.Document{ Name: k.FullPath(), Content: data, SubRepositoryPath: k.SubRepoPath, diff --git a/cmd/zoekt-sourcegraph-indexserver/backoff_test.go b/cmd/zoekt-sourcegraph-indexserver/backoff_test.go index b24e4b01b..a425c6de2 100644 --- a/cmd/zoekt-sourcegraph-indexserver/backoff_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/backoff_test.go @@ -95,7 +95,7 @@ func TestQueue_ResetFailuresCount(t *testing.T) { emptyQueue(queue) // consecutive failures will push backoff until to a further out time - for i := 0; i < 1000; i++ { + for range 1000 { queue.SetIndexed(opts, indexStateFail) } @@ -134,7 +134,7 @@ func TestQueue_MaxBackoffDuration(t *testing.T) { emptyQueue(queue) // consecutive failures increase duration up to a maximum - for i := 0; i < 100; i++ { + for range 100 { queue.SetIndexed(opts, indexStateFail) } @@ -309,7 +309,7 @@ func TestBackoff_IncrementConsecutiveFailures(t *testing.T) { now := time.Now() expectedFailuresCount := 0 - for i := 0; i < failedCount; i++ { + for i := range failedCount { backoff.Fail(now.Add(time.Duration(i)*backoffDuration), logtest.Scoped(t), opts) expectedFailuresCount++ assertFailuresCount(t, expectedFailuresCount, backoff) @@ -332,7 +332,7 @@ func TestBackoff_MaximumConsecutiveFailures(t *testing.T) { expectedFailuresCount := 0 // consecutive failures count increments per failure - for i := 0; i < maximumCount; i++ { + for i := range maximumCount { backoff.Fail(now.Add(time.Duration(i)*backoffDuration), logtest.Scoped(t), opts) expectedFailuresCount++ assertFailuresCount(t, expectedFailuresCount, backoff) @@ -356,7 +356,7 @@ func TestBackoff_ResetConsecutiveFailures(t *testing.T) { maxBackoff: maxBackoffDuration, } - for i := 0; i < failedCount; i++ { + for i := range failedCount { now := time.Now() // fail j consecutive times diff --git a/cmd/zoekt-sourcegraph-indexserver/cleanup.go b/cmd/zoekt-sourcegraph-indexserver/cleanup.go index 27097cac8..5a283ffd6 100644 --- a/cmd/zoekt-sourcegraph-indexserver/cleanup.go +++ b/cmd/zoekt-sourcegraph-indexserver/cleanup.go @@ -14,6 +14,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" ) var metricCleanupDuration = promauto.NewHistogram(prometheus.HistogramOpts{ @@ -35,7 +36,7 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) trash := getShards(trashDir) tombtones := getTombstonedRepos(indexDir) - index := getShards(indexDir) + indexShards := getShards(indexDir) // trash: Remove old shards and conflicts with index minAge := now.Add(-24 * time.Hour) @@ -50,7 +51,7 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) } } - if _, conflicts := index[repo]; !conflicts && !old { + if _, conflicts := indexShards[repo]; !conflicts && !old { continue } @@ -62,7 +63,7 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) // tombstones: Remove tombstones that conflict with index or trash. After this, // tombstones only contain repos that are neither in the trash nor in the index. for repo := range tombtones { - if _, conflicts := index[repo]; conflicts { + if _, conflicts := indexShards[repo]; conflicts { delete(tombtones, repo) } // Trash takes precedence over tombstones. @@ -75,13 +76,13 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) // shards that have the same ID but different names delete and start over. // This can happen when a repository is renamed. In future we should make // shard file names based on ID. - for repo, shards := range index { + for repo, shards := range indexShards { if consistentRepoName(shards) { continue } // prevent further processing since we will delete - delete(index, repo) + delete(indexShards, repo) // This should be rare, so give an informative log message. var paths []string @@ -113,7 +114,7 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) for _, repo := range repos { // Delete from index so that index will only contain shards to be // trashed. - delete(index, repo) + delete(indexShards, repo) if shards, ok := trash[repo]; ok { infoLog.Printf("restoring shards from trash for %v", repo) @@ -123,7 +124,7 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) if s, ok := tombtones[repo]; ok { infoLog.Printf("removing tombstone for %v", repo) - err := zoekt.UnsetTombstone(s.Path, repo) + err := index.UnsetTombstone(s.Path, repo) if err != nil { errorLog.Printf("error removing tombstone for %v: %s", repo, err) } @@ -131,7 +132,7 @@ func cleanup(indexDir string, repos []uint32, now time.Time, shardMerging bool) } // index: Move non-existent repos into trash - for repo, shards := range index { + for repo, shards := range indexShards { // Best-effort touch. If touch fails, we will just remove from the // trash sooner. for _, shard := range shards { @@ -197,7 +198,7 @@ func getShards(dir string) map[uint32][]shard { continue } - repos, _, err := zoekt.ReadMetadataPathAlive(path) + repos, _, err := index.ReadMetadataPathAlive(path) if err != nil { debugLog.Printf("failed to read shard: %v", err) continue @@ -231,7 +232,7 @@ func getTombstonedRepos(dir string) map[uint32]shard { m := make(map[uint32]shard) for _, p := range paths { - repos, _, err := zoekt.ReadMetadataPath(p) + repos, _, err := index.ReadMetadataPath(p) if err != nil { continue } @@ -285,7 +286,7 @@ func removeAll(shards ...shard) { // exceedingly rare due to it being a mix of partial failure on something in // trash + an admin re-adding a repository. for _, shard := range shards { - paths, err := zoekt.IndexFilePaths(shard.Path) + paths, err := index.IndexFilePaths(shard.Path) if err != nil { debugLog.Printf("failed to remove shard %s: %v", shard.Path, err) } @@ -299,7 +300,7 @@ func removeAll(shards ...shard) { func moveAll(dstDir string, shards []shard) { for i, shard := range shards { - paths, err := zoekt.IndexFilePaths(shard.Path) + paths, err := index.IndexFilePaths(shard.Path) if err != nil { errorLog.Printf("failed to stat shard paths, deleting all shards for %s: %v", shard.RepoName, err) removeAll(shards...) @@ -367,7 +368,7 @@ func maybeSetTombstone(shards []shard, repoID uint32) bool { return false } - if err := zoekt.SetTombstone(shards[0].Path, repoID); err != nil { + if err := index.SetTombstone(shards[0].Path, repoID); err != nil { errorLog.Printf("error setting tombstone for %d in shard %s: %s. Removing shard\n", repoID, shards[0].Path, err) _ = os.Remove(shards[0].Path) } @@ -379,6 +380,11 @@ var metricVacuumRunning = promauto.NewGauge(prometheus.GaugeOpts{ Help: "Set to 1 if indexserver's vacuum job is running.", }) +var metricNumberShards = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "index_number_shards", + Help: "The number of total shards.", +}) + var metricNumberCompoundShards = promauto.NewGauge(prometheus.GaugeOpts{ Name: "index_number_compound_shards", Help: "The number of compound shards.", @@ -449,7 +455,7 @@ func removeTombstones(fn string) ([]*zoekt.Repository, error) { runMerge = exec.Command("zoekt-merge-index", "merge", fn).Run } - repos, _, err := zoekt.ReadMetadataPath(fn) + repos, _, err := index.ReadMetadataPath(fn) if err != nil { return nil, fmt.Errorf("zoekt.ReadMetadataPath: %s", err) } @@ -465,7 +471,7 @@ func removeTombstones(fn string) ([]*zoekt.Repository, error) { } defer func() { - paths, err := zoekt.IndexFilePaths(fn) + paths, err := index.IndexFilePaths(fn) if err != nil { return } diff --git a/cmd/zoekt-sourcegraph-indexserver/cleanup_test.go b/cmd/zoekt-sourcegraph-indexserver/cleanup_test.go index 5a1fa7666..ca15ad837 100644 --- a/cmd/zoekt-sourcegraph-indexserver/cleanup_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/cleanup_test.go @@ -14,7 +14,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" ) func TestCleanup(t *testing.T) { @@ -22,7 +22,7 @@ func TestCleanup(t *testing.T) { return shard{ RepoID: fakeID(name), RepoName: name, - Path: zoekt.ShardName("", name, 15, n), + Path: fmt.Sprintf("%s_v%d.%05d.zoekt", name, 15, n), ModTime: mtime, RepoTombstone: false, } @@ -37,7 +37,7 @@ func TestCleanup(t *testing.T) { if filepath.Ext(path) != ".zoekt" { continue } - repos, _, _ := zoekt.ReadMetadataPathAlive(path) + repos, _, _ := index.ReadMetadataPathAlive(path) fi, _ := os.Stat(path) for _, repo := range repos { shards = append(shards, shard{ @@ -173,7 +173,7 @@ func createTestShard(t *testing.T, repo string, id uint32, path string, optFns . for _, optFn := range optFns { optFn(r) } - b, err := zoekt.NewIndexBuilder(r) + b, err := index.NewShardBuilder(r) if err != nil { t.Fatal(err) } @@ -234,7 +234,7 @@ func TestVacuum(t *testing.T) { tmpDir := t.TempDir() fn := createCompoundShard(t, tmpDir, []uint32{1, 2, 3, 4}) - err := zoekt.SetTombstone(fn, 2) + err := index.SetTombstone(fn, 2) if err != nil { t.Fatal(err) } @@ -258,7 +258,7 @@ func TestVacuum(t *testing.T) { t.Fatalf("expected 1 shard, but instead got %d", len(shards)) } - repos, _, err := zoekt.ReadMetadataPath(shards[0]) + repos, _, err := index.ReadMetadataPath(shards[0]) if err != nil { t.Fatal(err) } @@ -286,13 +286,13 @@ func TestGetTombstonedRepos(t *testing.T) { dir := t.TempDir() var repoID uint32 = 2 csOld := createCompoundShard(t, dir, []uint32{1, 2, 3, 4}, setLastCommitDate(time.Now().Add(-1*time.Hour))) - if err := zoekt.SetTombstone(csOld, repoID); err != nil { + if err := index.SetTombstone(csOld, repoID); err != nil { t.Fatal(err) } now := time.Now() csNew := createCompoundShard(t, dir, []uint32{5, 2, 6, 7}, setLastCommitDate(now)) - if err := zoekt.SetTombstone(csNew, repoID); err != nil { + if err := index.SetTombstone(csNew, repoID); err != nil { t.Fatal(err) } @@ -380,7 +380,7 @@ func TestCleanupCompoundShards(t *testing.T) { setTombstone := func(shardPath string, repoID uint32) { t.Helper() - if err := zoekt.SetTombstone(shardPath, repoID); err != nil { + if err := index.SetTombstone(shardPath, repoID); err != nil { t.Fatal(err) } } @@ -473,12 +473,12 @@ func createCompoundShard(t *testing.T, dir string, ids []uint32, optFns ...func( optsFn(&repo) } - opts := build.Options{ + opts := index.Options{ IndexDir: dir, RepositoryDescription: repo, } opts.SetDefaults() - b, err := build.NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -517,12 +517,12 @@ func mergeHelper(t *testing.T, fn string) error { } defer f.Close() - indexFile, err := zoekt.NewIndexFile(f) + indexFile, err := index.NewIndexFile(f) if err != nil { return fmt.Errorf("zoekt.NewIndexFile: %s ", err) } defer indexFile.Close() - _, _, err = zoekt.Merge(filepath.Dir(fn), indexFile) + _, _, err = index.Merge(filepath.Dir(fn), indexFile) return err } diff --git a/cmd/zoekt-sourcegraph-indexserver/debug.go b/cmd/zoekt-sourcegraph-indexserver/debug.go index 302f910c0..c905cdf4e 100644 --- a/cmd/zoekt-sourcegraph-indexserver/debug.go +++ b/cmd/zoekt-sourcegraph-indexserver/debug.go @@ -29,11 +29,11 @@ func debugIndex() *ffcli.Command { if err != nil { return err } - id, err := strconv.Atoi(args[0]) + id, err := strconv.ParseUint(args[0], 10, 32) if err != nil { return err } - msg, err := s.forceIndex(uint32(id)) + msg, err := s.forceIndex(ctx, uint32(id)) infoLog.Println(msg) if err != nil { return err diff --git a/cmd/zoekt-sourcegraph-indexserver/grpc/protos/README.md b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/README.md new file mode 100644 index 000000000..ee0e6e3b6 --- /dev/null +++ b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/README.md @@ -0,0 +1,11 @@ +# Sourcegraph indexserver protobuf definitions + +This directory contains protobuf definitions for the indexserver gRPC API. + +To generate the Go code, run this script from the repository root: + +```sh +./gen-proto.sh +``` + +Note: this script will regenerate all protos in the project, not just the ones in this directory. diff --git a/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1/buf.gen.yaml b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/buf.gen.yaml similarity index 100% rename from cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1/buf.gen.yaml rename to cmd/zoekt-sourcegraph-indexserver/grpc/protos/buf.gen.yaml diff --git a/cmd/zoekt-sourcegraph-indexserver/protos/buf.yaml b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/buf.yaml similarity index 100% rename from cmd/zoekt-sourcegraph-indexserver/protos/buf.yaml rename to cmd/zoekt-sourcegraph-indexserver/grpc/protos/buf.yaml diff --git a/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1/configuration.pb.go b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1/configuration.pb.go similarity index 59% rename from cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1/configuration.pb.go rename to cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1/configuration.pb.go index 1c09653a0..f8e3939c8 100644 --- a/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1/configuration.pb.go +++ b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1/configuration.pb.go @@ -2,7 +2,7 @@ // versions: // protoc-gen-go v1.28.1 // protoc (unknown) -// source: configuration.proto +// source: sourcegraph/zoekt/configuration/v1/configuration.proto package v1 @@ -57,11 +57,11 @@ func (x CTagsParserType) String() string { } func (CTagsParserType) Descriptor() protoreflect.EnumDescriptor { - return file_configuration_proto_enumTypes[0].Descriptor() + return file_sourcegraph_zoekt_configuration_v1_configuration_proto_enumTypes[0].Descriptor() } func (CTagsParserType) Type() protoreflect.EnumType { - return &file_configuration_proto_enumTypes[0] + return &file_sourcegraph_zoekt_configuration_v1_configuration_proto_enumTypes[0] } func (x CTagsParserType) Number() protoreflect.EnumNumber { @@ -70,7 +70,7 @@ func (x CTagsParserType) Number() protoreflect.EnumNumber { // Deprecated: Use CTagsParserType.Descriptor instead. func (CTagsParserType) EnumDescriptor() ([]byte, []int) { - return file_configuration_proto_rawDescGZIP(), []int{0} + return file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescGZIP(), []int{0} } // SearchConfigurationRequest is the request to the SearchConfiguration RPC. @@ -91,7 +91,7 @@ type SearchConfigurationRequest struct { func (x *SearchConfigurationRequest) Reset() { *x = SearchConfigurationRequest{} if protoimpl.UnsafeEnabled { - mi := &file_configuration_proto_msgTypes[0] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[0] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -104,7 +104,7 @@ func (x *SearchConfigurationRequest) String() string { func (*SearchConfigurationRequest) ProtoMessage() {} func (x *SearchConfigurationRequest) ProtoReflect() protoreflect.Message { - mi := &file_configuration_proto_msgTypes[0] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[0] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -117,7 +117,7 @@ func (x *SearchConfigurationRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use SearchConfigurationRequest.ProtoReflect.Descriptor instead. func (*SearchConfigurationRequest) Descriptor() ([]byte, []int) { - return file_configuration_proto_rawDescGZIP(), []int{0} + return file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescGZIP(), []int{0} } func (x *SearchConfigurationRequest) GetFingerprint() *Fingerprint { @@ -152,7 +152,7 @@ type SearchConfigurationResponse struct { func (x *SearchConfigurationResponse) Reset() { *x = SearchConfigurationResponse{} if protoimpl.UnsafeEnabled { - mi := &file_configuration_proto_msgTypes[1] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[1] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -165,7 +165,7 @@ func (x *SearchConfigurationResponse) String() string { func (*SearchConfigurationResponse) ProtoMessage() {} func (x *SearchConfigurationResponse) ProtoReflect() protoreflect.Message { - mi := &file_configuration_proto_msgTypes[1] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[1] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -178,7 +178,7 @@ func (x *SearchConfigurationResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use SearchConfigurationResponse.ProtoReflect.Descriptor instead. func (*SearchConfigurationResponse) Descriptor() ([]byte, []int) { - return file_configuration_proto_rawDescGZIP(), []int{1} + return file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescGZIP(), []int{1} } func (x *SearchConfigurationResponse) GetFingerprint() *Fingerprint { @@ -212,7 +212,7 @@ type Fingerprint struct { func (x *Fingerprint) Reset() { *x = Fingerprint{} if protoimpl.UnsafeEnabled { - mi := &file_configuration_proto_msgTypes[2] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[2] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -225,7 +225,7 @@ func (x *Fingerprint) String() string { func (*Fingerprint) ProtoMessage() {} func (x *Fingerprint) ProtoReflect() protoreflect.Message { - mi := &file_configuration_proto_msgTypes[2] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[2] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -238,7 +238,7 @@ func (x *Fingerprint) ProtoReflect() protoreflect.Message { // Deprecated: Use Fingerprint.ProtoReflect.Descriptor instead. func (*Fingerprint) Descriptor() ([]byte, []int) { - return file_configuration_proto_rawDescGZIP(), []int{2} + return file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescGZIP(), []int{2} } func (x *Fingerprint) GetGeneratedAt() *timestamppb.Timestamp { @@ -267,7 +267,7 @@ type LanguageMapping struct { func (x *LanguageMapping) Reset() { *x = LanguageMapping{} if protoimpl.UnsafeEnabled { - mi := &file_configuration_proto_msgTypes[3] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[3] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -280,7 +280,7 @@ func (x *LanguageMapping) String() string { func (*LanguageMapping) ProtoMessage() {} func (x *LanguageMapping) ProtoReflect() protoreflect.Message { - mi := &file_configuration_proto_msgTypes[3] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[3] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -293,7 +293,7 @@ func (x *LanguageMapping) ProtoReflect() protoreflect.Message { // Deprecated: Use LanguageMapping.ProtoReflect.Descriptor instead. func (*LanguageMapping) Descriptor() ([]byte, []int) { - return file_configuration_proto_rawDescGZIP(), []int{3} + return file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescGZIP(), []int{3} } func (x *LanguageMapping) GetLanguage() string { @@ -358,7 +358,7 @@ type ZoektIndexOptions struct { func (x *ZoektIndexOptions) Reset() { *x = ZoektIndexOptions{} if protoimpl.UnsafeEnabled { - mi := &file_configuration_proto_msgTypes[4] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[4] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -371,7 +371,7 @@ func (x *ZoektIndexOptions) String() string { func (*ZoektIndexOptions) ProtoMessage() {} func (x *ZoektIndexOptions) ProtoReflect() protoreflect.Message { - mi := &file_configuration_proto_msgTypes[4] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[4] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -384,7 +384,7 @@ func (x *ZoektIndexOptions) ProtoReflect() protoreflect.Message { // Deprecated: Use ZoektIndexOptions.ProtoReflect.Descriptor instead. func (*ZoektIndexOptions) Descriptor() ([]byte, []int) { - return file_configuration_proto_rawDescGZIP(), []int{4} + return file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescGZIP(), []int{4} } func (x *ZoektIndexOptions) GetName() string { @@ -500,7 +500,7 @@ type ZoektRepositoryBranch struct { func (x *ZoektRepositoryBranch) Reset() { *x = ZoektRepositoryBranch{} if protoimpl.UnsafeEnabled { - mi := &file_configuration_proto_msgTypes[5] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[5] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -513,7 +513,7 @@ func (x *ZoektRepositoryBranch) String() string { func (*ZoektRepositoryBranch) ProtoMessage() {} func (x *ZoektRepositoryBranch) ProtoReflect() protoreflect.Message { - mi := &file_configuration_proto_msgTypes[5] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[5] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -526,7 +526,7 @@ func (x *ZoektRepositoryBranch) ProtoReflect() protoreflect.Message { // Deprecated: Use ZoektRepositoryBranch.ProtoReflect.Descriptor instead. func (*ZoektRepositoryBranch) Descriptor() ([]byte, []int) { - return file_configuration_proto_rawDescGZIP(), []int{5} + return file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescGZIP(), []int{5} } func (x *ZoektRepositoryBranch) GetName() string { @@ -558,7 +558,7 @@ type ListRequest struct { func (x *ListRequest) Reset() { *x = ListRequest{} if protoimpl.UnsafeEnabled { - mi := &file_configuration_proto_msgTypes[6] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[6] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -571,7 +571,7 @@ func (x *ListRequest) String() string { func (*ListRequest) ProtoMessage() {} func (x *ListRequest) ProtoReflect() protoreflect.Message { - mi := &file_configuration_proto_msgTypes[6] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[6] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -584,7 +584,7 @@ func (x *ListRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use ListRequest.ProtoReflect.Descriptor instead. func (*ListRequest) Descriptor() ([]byte, []int) { - return file_configuration_proto_rawDescGZIP(), []int{6} + return file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescGZIP(), []int{6} } func (x *ListRequest) GetHostname() string { @@ -614,7 +614,7 @@ type ListResponse struct { func (x *ListResponse) Reset() { *x = ListResponse{} if protoimpl.UnsafeEnabled { - mi := &file_configuration_proto_msgTypes[7] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[7] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -627,7 +627,7 @@ func (x *ListResponse) String() string { func (*ListResponse) ProtoMessage() {} func (x *ListResponse) ProtoReflect() protoreflect.Message { - mi := &file_configuration_proto_msgTypes[7] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[7] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -640,7 +640,7 @@ func (x *ListResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use ListResponse.ProtoReflect.Descriptor instead. func (*ListResponse) Descriptor() ([]byte, []int) { - return file_configuration_proto_rawDescGZIP(), []int{7} + return file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescGZIP(), []int{7} } func (x *ListResponse) GetRepoIds() []int32 { @@ -662,7 +662,7 @@ type UpdateIndexStatusRequest struct { func (x *UpdateIndexStatusRequest) Reset() { *x = UpdateIndexStatusRequest{} if protoimpl.UnsafeEnabled { - mi := &file_configuration_proto_msgTypes[8] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[8] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -675,7 +675,7 @@ func (x *UpdateIndexStatusRequest) String() string { func (*UpdateIndexStatusRequest) ProtoMessage() {} func (x *UpdateIndexStatusRequest) ProtoReflect() protoreflect.Message { - mi := &file_configuration_proto_msgTypes[8] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[8] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -688,7 +688,7 @@ func (x *UpdateIndexStatusRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use UpdateIndexStatusRequest.ProtoReflect.Descriptor instead. func (*UpdateIndexStatusRequest) Descriptor() ([]byte, []int) { - return file_configuration_proto_rawDescGZIP(), []int{8} + return file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescGZIP(), []int{8} } func (x *UpdateIndexStatusRequest) GetRepositories() []*UpdateIndexStatusRequest_Repository { @@ -708,7 +708,7 @@ type UpdateIndexStatusResponse struct { func (x *UpdateIndexStatusResponse) Reset() { *x = UpdateIndexStatusResponse{} if protoimpl.UnsafeEnabled { - mi := &file_configuration_proto_msgTypes[9] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[9] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -721,7 +721,7 @@ func (x *UpdateIndexStatusResponse) String() string { func (*UpdateIndexStatusResponse) ProtoMessage() {} func (x *UpdateIndexStatusResponse) ProtoReflect() protoreflect.Message { - mi := &file_configuration_proto_msgTypes[9] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[9] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -734,7 +734,7 @@ func (x *UpdateIndexStatusResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use UpdateIndexStatusResponse.ProtoReflect.Descriptor instead. func (*UpdateIndexStatusResponse) Descriptor() ([]byte, []int) { - return file_configuration_proto_rawDescGZIP(), []int{9} + return file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescGZIP(), []int{9} } type UpdateIndexStatusRequest_Repository struct { @@ -753,7 +753,7 @@ type UpdateIndexStatusRequest_Repository struct { func (x *UpdateIndexStatusRequest_Repository) Reset() { *x = UpdateIndexStatusRequest_Repository{} if protoimpl.UnsafeEnabled { - mi := &file_configuration_proto_msgTypes[10] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[10] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -766,7 +766,7 @@ func (x *UpdateIndexStatusRequest_Repository) String() string { func (*UpdateIndexStatusRequest_Repository) ProtoMessage() {} func (x *UpdateIndexStatusRequest_Repository) ProtoReflect() protoreflect.Message { - mi := &file_configuration_proto_msgTypes[10] + mi := &file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[10] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -779,7 +779,7 @@ func (x *UpdateIndexStatusRequest_Repository) ProtoReflect() protoreflect.Messag // Deprecated: Use UpdateIndexStatusRequest_Repository.ProtoReflect.Descriptor instead. func (*UpdateIndexStatusRequest_Repository) Descriptor() ([]byte, []int) { - return file_configuration_proto_rawDescGZIP(), []int{8, 0} + return file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescGZIP(), []int{8, 0} } func (x *UpdateIndexStatusRequest_Repository) GetRepoId() uint32 { @@ -803,180 +803,182 @@ func (x *UpdateIndexStatusRequest_Repository) GetIndexTimeUnix() int64 { return 0 } -var File_configuration_proto protoreflect.FileDescriptor - -var file_configuration_proto_rawDesc = []byte{ - 0x0a, 0x13, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x22, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, - 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, - 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x1a, 0x1f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, - 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x74, 0x69, 0x6d, 0x65, 0x73, - 0x74, 0x61, 0x6d, 0x70, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x8a, 0x01, 0x0a, 0x1a, 0x53, +var File_sourcegraph_zoekt_configuration_v1_configuration_proto protoreflect.FileDescriptor + +var file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDesc = []byte{ + 0x0a, 0x36, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2f, 0x7a, 0x6f, + 0x65, 0x6b, 0x74, 0x2f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x2f, 0x76, 0x31, 0x2f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x22, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, + 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, + 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x1a, 0x1f, 0x67, 0x6f, + 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x74, 0x69, + 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x8a, 0x01, + 0x0a, 0x1a, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x51, 0x0a, 0x0b, + 0x66, 0x69, 0x6e, 0x67, 0x65, 0x72, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x2f, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, + 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x46, 0x69, 0x6e, 0x67, 0x65, 0x72, 0x70, 0x72, 0x69, + 0x6e, 0x74, 0x52, 0x0b, 0x66, 0x69, 0x6e, 0x67, 0x65, 0x72, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x12, + 0x19, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6f, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, + 0x05, 0x52, 0x07, 0x72, 0x65, 0x70, 0x6f, 0x49, 0x64, 0x73, 0x22, 0xd0, 0x01, 0x0a, 0x1b, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x51, 0x0a, 0x0b, 0x66, 0x69, 0x6e, - 0x67, 0x65, 0x72, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2f, - 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, - 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x2e, 0x76, 0x31, 0x2e, 0x46, 0x69, 0x6e, 0x67, 0x65, 0x72, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x52, - 0x0b, 0x66, 0x69, 0x6e, 0x67, 0x65, 0x72, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x12, 0x19, 0x0a, 0x08, - 0x72, 0x65, 0x70, 0x6f, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x07, - 0x72, 0x65, 0x70, 0x6f, 0x49, 0x64, 0x73, 0x22, 0xd0, 0x01, 0x0a, 0x1b, 0x53, 0x65, 0x61, 0x72, - 0x63, 0x68, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x51, 0x0a, 0x0b, 0x66, 0x69, 0x6e, 0x67, 0x65, - 0x72, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2f, 0x2e, 0x73, - 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, - 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, - 0x31, 0x2e, 0x46, 0x69, 0x6e, 0x67, 0x65, 0x72, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x52, 0x0b, 0x66, - 0x69, 0x6e, 0x67, 0x65, 0x72, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x12, 0x5e, 0x0a, 0x0f, 0x75, 0x70, - 0x64, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x35, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, - 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, - 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x5a, 0x6f, 0x65, 0x6b, 0x74, 0x49, 0x6e, - 0x64, 0x65, 0x78, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x0e, 0x75, 0x70, 0x64, 0x61, - 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x6c, 0x0a, 0x0b, 0x46, 0x69, - 0x6e, 0x67, 0x65, 0x72, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x12, 0x3d, 0x0a, 0x0c, 0x67, 0x65, 0x6e, - 0x65, 0x72, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, - 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, - 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0b, 0x67, 0x65, 0x6e, - 0x65, 0x72, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x69, 0x64, 0x65, 0x6e, - 0x74, 0x69, 0x66, 0x69, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0a, 0x69, 0x64, - 0x65, 0x6e, 0x74, 0x69, 0x66, 0x69, 0x65, 0x72, 0x22, 0x78, 0x0a, 0x0f, 0x4c, 0x61, 0x6e, 0x67, - 0x75, 0x61, 0x67, 0x65, 0x4d, 0x61, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x12, 0x1a, 0x0a, 0x08, 0x6c, - 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, - 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x12, 0x49, 0x0a, 0x05, 0x63, 0x74, 0x61, 0x67, 0x73, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x33, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, + 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x51, 0x0a, 0x0b, 0x66, 0x69, + 0x6e, 0x67, 0x65, 0x72, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x2f, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, + 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x46, 0x69, 0x6e, 0x67, 0x65, 0x72, 0x70, 0x72, 0x69, 0x6e, 0x74, + 0x52, 0x0b, 0x66, 0x69, 0x6e, 0x67, 0x65, 0x72, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x12, 0x5e, 0x0a, + 0x0f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x35, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, - 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x54, 0x61, 0x67, - 0x73, 0x50, 0x61, 0x72, 0x73, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x52, 0x05, 0x63, 0x74, 0x61, - 0x67, 0x73, 0x22, 0xa4, 0x04, 0x0a, 0x11, 0x5a, 0x6f, 0x65, 0x6b, 0x74, 0x49, 0x6e, 0x64, 0x65, - 0x78, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x17, 0x0a, 0x07, - 0x72, 0x65, 0x70, 0x6f, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x72, - 0x65, 0x70, 0x6f, 0x49, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x63, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x06, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x63, 0x12, 0x12, 0x0a, - 0x04, 0x66, 0x6f, 0x72, 0x6b, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x66, 0x6f, 0x72, - 0x6b, 0x12, 0x1a, 0x0a, 0x08, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0x18, 0x05, 0x20, - 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, 0x12, 0x1f, 0x0a, - 0x0b, 0x6c, 0x61, 0x72, 0x67, 0x65, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, 0x06, 0x20, 0x03, - 0x28, 0x09, 0x52, 0x0a, 0x6c, 0x61, 0x72, 0x67, 0x65, 0x46, 0x69, 0x6c, 0x65, 0x73, 0x12, 0x18, - 0x0a, 0x07, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x07, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x12, 0x55, 0x0a, 0x08, 0x62, 0x72, 0x61, 0x6e, - 0x63, 0x68, 0x65, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x39, 0x2e, 0x73, 0x6f, 0x75, - 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, - 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, + 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x5a, 0x6f, 0x65, 0x6b, + 0x74, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x0e, 0x75, + 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x6c, 0x0a, + 0x0b, 0x46, 0x69, 0x6e, 0x67, 0x65, 0x72, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x12, 0x3d, 0x0a, 0x0c, + 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0b, + 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x69, + 0x64, 0x65, 0x6e, 0x74, 0x69, 0x66, 0x69, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, + 0x0a, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x66, 0x69, 0x65, 0x72, 0x22, 0x78, 0x0a, 0x0f, 0x4c, + 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x4d, 0x61, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x12, 0x1a, + 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x12, 0x49, 0x0a, 0x05, 0x63, 0x74, + 0x61, 0x67, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x33, 0x2e, 0x73, 0x6f, 0x75, 0x72, + 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, + 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x43, + 0x54, 0x61, 0x67, 0x73, 0x50, 0x61, 0x72, 0x73, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x52, 0x05, + 0x63, 0x74, 0x61, 0x67, 0x73, 0x22, 0xa4, 0x04, 0x0a, 0x11, 0x5a, 0x6f, 0x65, 0x6b, 0x74, 0x49, + 0x6e, 0x64, 0x65, 0x78, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6e, + 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, + 0x17, 0x0a, 0x07, 0x72, 0x65, 0x70, 0x6f, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x06, 0x72, 0x65, 0x70, 0x6f, 0x49, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x70, 0x75, 0x62, 0x6c, + 0x69, 0x63, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x06, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x63, + 0x12, 0x12, 0x0a, 0x04, 0x66, 0x6f, 0x72, 0x6b, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, + 0x66, 0x6f, 0x72, 0x6b, 0x12, 0x1a, 0x0a, 0x08, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, + 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x64, + 0x12, 0x1f, 0x0a, 0x0b, 0x6c, 0x61, 0x72, 0x67, 0x65, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, + 0x06, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0a, 0x6c, 0x61, 0x72, 0x67, 0x65, 0x46, 0x69, 0x6c, 0x65, + 0x73, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x18, 0x07, 0x20, 0x01, + 0x28, 0x08, 0x52, 0x07, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x12, 0x55, 0x0a, 0x08, 0x62, + 0x72, 0x61, 0x6e, 0x63, 0x68, 0x65, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x39, 0x2e, + 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, + 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, + 0x76, 0x31, 0x2e, 0x5a, 0x6f, 0x65, 0x6b, 0x74, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, + 0x72, 0x79, 0x42, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x52, 0x08, 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, + 0x65, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x69, 0x6f, 0x72, 0x69, 0x74, 0x79, 0x18, 0x09, + 0x20, 0x01, 0x28, 0x01, 0x52, 0x08, 0x70, 0x72, 0x69, 0x6f, 0x72, 0x69, 0x74, 0x79, 0x12, 0x34, + 0x0a, 0x16, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, + 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x14, + 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x56, 0x65, 0x72, + 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x0b, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x56, 0x0a, 0x0c, 0x6c, 0x61, + 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x5f, 0x6d, 0x61, 0x70, 0x18, 0x0c, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x33, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, + 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x4c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x4d, 0x61, + 0x70, 0x70, 0x69, 0x6e, 0x67, 0x52, 0x0b, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x4d, + 0x61, 0x70, 0x12, 0x2b, 0x0a, 0x11, 0x73, 0x68, 0x61, 0x72, 0x64, 0x5f, 0x63, 0x6f, 0x6e, 0x63, + 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x05, 0x52, 0x10, 0x73, + 0x68, 0x61, 0x72, 0x64, 0x43, 0x6f, 0x6e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x12, + 0x1b, 0x0a, 0x09, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x0e, 0x20, 0x01, + 0x28, 0x03, 0x52, 0x08, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x49, 0x64, 0x22, 0x45, 0x0a, 0x15, 0x5a, 0x6f, 0x65, 0x6b, 0x74, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x42, - 0x72, 0x61, 0x6e, 0x63, 0x68, 0x52, 0x08, 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x65, 0x73, 0x12, - 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x69, 0x6f, 0x72, 0x69, 0x74, 0x79, 0x18, 0x09, 0x20, 0x01, 0x28, - 0x01, 0x52, 0x08, 0x70, 0x72, 0x69, 0x6f, 0x72, 0x69, 0x74, 0x79, 0x12, 0x34, 0x0a, 0x16, 0x64, - 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x5f, 0x76, 0x65, - 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x14, 0x64, 0x6f, 0x63, - 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, - 0x6e, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x56, 0x0a, 0x0c, 0x6c, 0x61, 0x6e, 0x67, 0x75, - 0x61, 0x67, 0x65, 0x5f, 0x6d, 0x61, 0x70, 0x18, 0x0c, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x33, 0x2e, + 0x72, 0x61, 0x6e, 0x63, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, + 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, + 0x69, 0x6f, 0x6e, 0x22, 0x4a, 0x0a, 0x0b, 0x4c, 0x69, 0x73, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x68, 0x6f, 0x73, 0x74, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x68, 0x6f, 0x73, 0x74, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1f, + 0x0a, 0x0b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x65, 0x64, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x02, 0x20, + 0x03, 0x28, 0x05, 0x52, 0x0a, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x65, 0x64, 0x49, 0x64, 0x73, 0x22, + 0x29, 0x0a, 0x0c, 0x4c, 0x69, 0x73, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, + 0x19, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x6f, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x05, 0x52, 0x07, 0x72, 0x65, 0x70, 0x6f, 0x49, 0x64, 0x73, 0x22, 0xae, 0x02, 0x0a, 0x18, 0x55, + 0x70, 0x64, 0x61, 0x74, 0x65, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x6b, 0x0a, 0x0c, 0x72, 0x65, 0x70, 0x6f, 0x73, + 0x69, 0x74, 0x6f, 0x72, 0x69, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x47, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, - 0x76, 0x31, 0x2e, 0x4c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x4d, 0x61, 0x70, 0x70, 0x69, - 0x6e, 0x67, 0x52, 0x0b, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x4d, 0x61, 0x70, 0x12, - 0x2b, 0x0a, 0x11, 0x73, 0x68, 0x61, 0x72, 0x64, 0x5f, 0x63, 0x6f, 0x6e, 0x63, 0x75, 0x72, 0x72, - 0x65, 0x6e, 0x63, 0x79, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x05, 0x52, 0x10, 0x73, 0x68, 0x61, 0x72, - 0x64, 0x43, 0x6f, 0x6e, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x12, 0x1b, 0x0a, 0x09, - 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x03, 0x52, - 0x08, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x49, 0x64, 0x22, 0x45, 0x0a, 0x15, 0x5a, 0x6f, 0x65, - 0x6b, 0x74, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x42, 0x72, 0x61, 0x6e, - 0x63, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, - 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, - 0x22, 0x4a, 0x0a, 0x0b, 0x4c, 0x69, 0x73, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, - 0x1a, 0x0a, 0x08, 0x68, 0x6f, 0x73, 0x74, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x08, 0x68, 0x6f, 0x73, 0x74, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x69, - 0x6e, 0x64, 0x65, 0x78, 0x65, 0x64, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, - 0x52, 0x0a, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x65, 0x64, 0x49, 0x64, 0x73, 0x22, 0x29, 0x0a, 0x0c, - 0x4c, 0x69, 0x73, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x19, 0x0a, 0x08, - 0x72, 0x65, 0x70, 0x6f, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x05, 0x52, 0x07, - 0x72, 0x65, 0x70, 0x6f, 0x49, 0x64, 0x73, 0x22, 0xae, 0x02, 0x0a, 0x18, 0x55, 0x70, 0x64, 0x61, - 0x74, 0x65, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x12, 0x6b, 0x0a, 0x0c, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, - 0x72, 0x69, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x47, 0x2e, 0x73, 0x6f, 0x75, - 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, - 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, - 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x53, 0x74, 0x61, 0x74, 0x75, - 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, - 0x6f, 0x72, 0x79, 0x52, 0x0c, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x69, 0x65, - 0x73, 0x1a, 0xa4, 0x01, 0x0a, 0x0a, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, - 0x12, 0x17, 0x0a, 0x07, 0x72, 0x65, 0x70, 0x6f, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x0d, 0x52, 0x06, 0x72, 0x65, 0x70, 0x6f, 0x49, 0x64, 0x12, 0x55, 0x0a, 0x08, 0x62, 0x72, 0x61, - 0x6e, 0x63, 0x68, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x39, 0x2e, 0x73, 0x6f, - 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, - 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, - 0x2e, 0x5a, 0x6f, 0x65, 0x6b, 0x74, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, - 0x42, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x52, 0x08, 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x65, 0x73, - 0x12, 0x26, 0x0a, 0x0f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, 0x75, - 0x6e, 0x69, 0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0d, 0x69, 0x6e, 0x64, 0x65, 0x78, - 0x54, 0x69, 0x6d, 0x65, 0x55, 0x6e, 0x69, 0x78, 0x22, 0x1b, 0x0a, 0x19, 0x55, 0x70, 0x64, 0x61, - 0x74, 0x65, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2a, 0x91, 0x01, 0x0a, 0x0f, 0x43, 0x54, 0x61, 0x67, 0x73, 0x50, - 0x61, 0x72, 0x73, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x22, 0x0a, 0x1e, 0x43, 0x5f, 0x54, - 0x41, 0x47, 0x53, 0x5f, 0x50, 0x41, 0x52, 0x53, 0x45, 0x52, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, - 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x1b, 0x0a, - 0x17, 0x43, 0x5f, 0x54, 0x41, 0x47, 0x53, 0x5f, 0x50, 0x41, 0x52, 0x53, 0x45, 0x52, 0x5f, 0x54, - 0x59, 0x50, 0x45, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x01, 0x12, 0x20, 0x0a, 0x1c, 0x43, 0x5f, - 0x54, 0x41, 0x47, 0x53, 0x5f, 0x50, 0x41, 0x52, 0x53, 0x45, 0x52, 0x5f, 0x54, 0x59, 0x50, 0x45, - 0x5f, 0x55, 0x4e, 0x49, 0x56, 0x45, 0x52, 0x53, 0x41, 0x4c, 0x10, 0x02, 0x12, 0x1b, 0x0a, 0x17, + 0x76, 0x31, 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x52, 0x65, 0x70, 0x6f, + 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x52, 0x0c, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, + 0x72, 0x69, 0x65, 0x73, 0x1a, 0xa4, 0x01, 0x0a, 0x0a, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, + 0x6f, 0x72, 0x79, 0x12, 0x17, 0x0a, 0x07, 0x72, 0x65, 0x70, 0x6f, 0x5f, 0x69, 0x64, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x72, 0x65, 0x70, 0x6f, 0x49, 0x64, 0x12, 0x55, 0x0a, 0x08, + 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x39, + 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, + 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x2e, 0x76, 0x31, 0x2e, 0x5a, 0x6f, 0x65, 0x6b, 0x74, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, + 0x6f, 0x72, 0x79, 0x42, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x52, 0x08, 0x62, 0x72, 0x61, 0x6e, 0x63, + 0x68, 0x65, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x74, 0x69, 0x6d, + 0x65, 0x5f, 0x75, 0x6e, 0x69, 0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0d, 0x69, 0x6e, + 0x64, 0x65, 0x78, 0x54, 0x69, 0x6d, 0x65, 0x55, 0x6e, 0x69, 0x78, 0x22, 0x1b, 0x0a, 0x19, 0x55, + 0x70, 0x64, 0x61, 0x74, 0x65, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2a, 0x91, 0x01, 0x0a, 0x0f, 0x43, 0x54, 0x61, + 0x67, 0x73, 0x50, 0x61, 0x72, 0x73, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x22, 0x0a, 0x1e, 0x43, 0x5f, 0x54, 0x41, 0x47, 0x53, 0x5f, 0x50, 0x41, 0x52, 0x53, 0x45, 0x52, 0x5f, 0x54, 0x59, - 0x50, 0x45, 0x5f, 0x53, 0x43, 0x49, 0x50, 0x10, 0x03, 0x32, 0xb8, 0x03, 0x0a, 0x19, 0x5a, 0x6f, - 0x65, 0x6b, 0x74, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x98, 0x01, 0x0a, 0x13, 0x53, 0x65, 0x61, 0x72, - 0x63, 0x68, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, - 0x3e, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, - 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, - 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x43, 0x6f, 0x6e, 0x66, 0x69, - 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, - 0x3f, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, - 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, - 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x43, 0x6f, 0x6e, 0x66, 0x69, - 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x22, 0x00, 0x12, 0x6b, 0x0a, 0x04, 0x4c, 0x69, 0x73, 0x74, 0x12, 0x2f, 0x2e, 0x73, 0x6f, 0x75, - 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, - 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, - 0x4c, 0x69, 0x73, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x30, 0x2e, 0x73, 0x6f, - 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, - 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, - 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, - 0x92, 0x01, 0x0a, 0x11, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x53, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, - 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, - 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, - 0x65, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x1a, 0x3d, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, - 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, - 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x49, - 0x6e, 0x64, 0x65, 0x78, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x22, 0x00, 0x42, 0x6a, 0x5a, 0x68, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, - 0x6f, 0x6d, 0x2f, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2f, 0x7a, - 0x6f, 0x65, 0x6b, 0x74, 0x2f, 0x63, 0x6d, 0x64, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2d, 0x73, - 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2d, 0x69, 0x6e, 0x64, 0x65, 0x78, - 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x73, 0x6f, - 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, - 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x76, 0x31, - 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, + 0x12, 0x1b, 0x0a, 0x17, 0x43, 0x5f, 0x54, 0x41, 0x47, 0x53, 0x5f, 0x50, 0x41, 0x52, 0x53, 0x45, + 0x52, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x01, 0x12, 0x20, 0x0a, + 0x1c, 0x43, 0x5f, 0x54, 0x41, 0x47, 0x53, 0x5f, 0x50, 0x41, 0x52, 0x53, 0x45, 0x52, 0x5f, 0x54, + 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x49, 0x56, 0x45, 0x52, 0x53, 0x41, 0x4c, 0x10, 0x02, 0x12, + 0x1b, 0x0a, 0x17, 0x43, 0x5f, 0x54, 0x41, 0x47, 0x53, 0x5f, 0x50, 0x41, 0x52, 0x53, 0x45, 0x52, + 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x43, 0x49, 0x50, 0x10, 0x03, 0x32, 0xb8, 0x03, 0x0a, + 0x19, 0x5a, 0x6f, 0x65, 0x6b, 0x74, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x98, 0x01, 0x0a, 0x13, 0x53, + 0x65, 0x61, 0x72, 0x63, 0x68, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x12, 0x3e, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, + 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x43, 0x6f, + 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x1a, 0x3f, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, + 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x43, 0x6f, + 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x6b, 0x0a, 0x04, 0x4c, 0x69, 0x73, 0x74, 0x12, 0x2f, 0x2e, + 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, + 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, + 0x76, 0x31, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x30, + 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, + 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x2e, 0x76, 0x31, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x22, 0x00, 0x12, 0x92, 0x01, 0x0a, 0x11, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x49, 0x6e, 0x64, + 0x65, 0x78, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3c, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, + 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, + 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x55, 0x70, + 0x64, 0x61, 0x74, 0x65, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x3d, 0x2e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, + 0x72, 0x61, 0x70, 0x68, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, + 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x76, 0x31, 0x2e, 0x55, 0x70, 0x64, 0x61, + 0x74, 0x65, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x6f, 0x5a, 0x6d, 0x67, 0x69, 0x74, 0x68, 0x75, + 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, + 0x68, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, 0x63, 0x6d, 0x64, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, + 0x74, 0x2d, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2d, 0x69, 0x6e, + 0x64, 0x65, 0x78, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, + 0x68, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( - file_configuration_proto_rawDescOnce sync.Once - file_configuration_proto_rawDescData = file_configuration_proto_rawDesc + file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescOnce sync.Once + file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescData = file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDesc ) -func file_configuration_proto_rawDescGZIP() []byte { - file_configuration_proto_rawDescOnce.Do(func() { - file_configuration_proto_rawDescData = protoimpl.X.CompressGZIP(file_configuration_proto_rawDescData) +func file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescGZIP() []byte { + file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescOnce.Do(func() { + file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescData = protoimpl.X.CompressGZIP(file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescData) }) - return file_configuration_proto_rawDescData + return file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDescData } -var file_configuration_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_configuration_proto_msgTypes = make([]protoimpl.MessageInfo, 11) -var file_configuration_proto_goTypes = []interface{}{ +var file_sourcegraph_zoekt_configuration_v1_configuration_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes = make([]protoimpl.MessageInfo, 11) +var file_sourcegraph_zoekt_configuration_v1_configuration_proto_goTypes = []interface{}{ (CTagsParserType)(0), // 0: sourcegraph.zoekt.configuration.v1.CTagsParserType (*SearchConfigurationRequest)(nil), // 1: sourcegraph.zoekt.configuration.v1.SearchConfigurationRequest (*SearchConfigurationResponse)(nil), // 2: sourcegraph.zoekt.configuration.v1.SearchConfigurationResponse @@ -991,7 +993,7 @@ var file_configuration_proto_goTypes = []interface{}{ (*UpdateIndexStatusRequest_Repository)(nil), // 11: sourcegraph.zoekt.configuration.v1.UpdateIndexStatusRequest.Repository (*timestamppb.Timestamp)(nil), // 12: google.protobuf.Timestamp } -var file_configuration_proto_depIdxs = []int32{ +var file_sourcegraph_zoekt_configuration_v1_configuration_proto_depIdxs = []int32{ 3, // 0: sourcegraph.zoekt.configuration.v1.SearchConfigurationRequest.fingerprint:type_name -> sourcegraph.zoekt.configuration.v1.Fingerprint 3, // 1: sourcegraph.zoekt.configuration.v1.SearchConfigurationResponse.fingerprint:type_name -> sourcegraph.zoekt.configuration.v1.Fingerprint 5, // 2: sourcegraph.zoekt.configuration.v1.SearchConfigurationResponse.updated_options:type_name -> sourcegraph.zoekt.configuration.v1.ZoektIndexOptions @@ -1014,13 +1016,13 @@ var file_configuration_proto_depIdxs = []int32{ 0, // [0:9] is the sub-list for field type_name } -func init() { file_configuration_proto_init() } -func file_configuration_proto_init() { - if File_configuration_proto != nil { +func init() { file_sourcegraph_zoekt_configuration_v1_configuration_proto_init() } +func file_sourcegraph_zoekt_configuration_v1_configuration_proto_init() { + if File_sourcegraph_zoekt_configuration_v1_configuration_proto != nil { return } if !protoimpl.UnsafeEnabled { - file_configuration_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*SearchConfigurationRequest); i { case 0: return &v.state @@ -1032,7 +1034,7 @@ func file_configuration_proto_init() { return nil } } - file_configuration_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*SearchConfigurationResponse); i { case 0: return &v.state @@ -1044,7 +1046,7 @@ func file_configuration_proto_init() { return nil } } - file_configuration_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*Fingerprint); i { case 0: return &v.state @@ -1056,7 +1058,7 @@ func file_configuration_proto_init() { return nil } } - file_configuration_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*LanguageMapping); i { case 0: return &v.state @@ -1068,7 +1070,7 @@ func file_configuration_proto_init() { return nil } } - file_configuration_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { + file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*ZoektIndexOptions); i { case 0: return &v.state @@ -1080,7 +1082,7 @@ func file_configuration_proto_init() { return nil } } - file_configuration_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { + file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*ZoektRepositoryBranch); i { case 0: return &v.state @@ -1092,7 +1094,7 @@ func file_configuration_proto_init() { return nil } } - file_configuration_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { + file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*ListRequest); i { case 0: return &v.state @@ -1104,7 +1106,7 @@ func file_configuration_proto_init() { return nil } } - file_configuration_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { + file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*ListResponse); i { case 0: return &v.state @@ -1116,7 +1118,7 @@ func file_configuration_proto_init() { return nil } } - file_configuration_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { + file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*UpdateIndexStatusRequest); i { case 0: return &v.state @@ -1128,7 +1130,7 @@ func file_configuration_proto_init() { return nil } } - file_configuration_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { + file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*UpdateIndexStatusResponse); i { case 0: return &v.state @@ -1140,7 +1142,7 @@ func file_configuration_proto_init() { return nil } } - file_configuration_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { + file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*UpdateIndexStatusRequest_Repository); i { case 0: return &v.state @@ -1157,19 +1159,19 @@ func file_configuration_proto_init() { out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: file_configuration_proto_rawDesc, + RawDescriptor: file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDesc, NumEnums: 1, NumMessages: 11, NumExtensions: 0, NumServices: 1, }, - GoTypes: file_configuration_proto_goTypes, - DependencyIndexes: file_configuration_proto_depIdxs, - EnumInfos: file_configuration_proto_enumTypes, - MessageInfos: file_configuration_proto_msgTypes, + GoTypes: file_sourcegraph_zoekt_configuration_v1_configuration_proto_goTypes, + DependencyIndexes: file_sourcegraph_zoekt_configuration_v1_configuration_proto_depIdxs, + EnumInfos: file_sourcegraph_zoekt_configuration_v1_configuration_proto_enumTypes, + MessageInfos: file_sourcegraph_zoekt_configuration_v1_configuration_proto_msgTypes, }.Build() - File_configuration_proto = out.File - file_configuration_proto_rawDesc = nil - file_configuration_proto_goTypes = nil - file_configuration_proto_depIdxs = nil + File_sourcegraph_zoekt_configuration_v1_configuration_proto = out.File + file_sourcegraph_zoekt_configuration_v1_configuration_proto_rawDesc = nil + file_sourcegraph_zoekt_configuration_v1_configuration_proto_goTypes = nil + file_sourcegraph_zoekt_configuration_v1_configuration_proto_depIdxs = nil } diff --git a/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1/configuration.proto b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1/configuration.proto similarity index 98% rename from cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1/configuration.proto rename to cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1/configuration.proto index 889cf60ec..ecd611d70 100644 --- a/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1/configuration.proto +++ b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1/configuration.proto @@ -4,7 +4,7 @@ package sourcegraph.zoekt.configuration.v1; import "google/protobuf/timestamp.proto"; -option go_package = "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1"; +option go_package = "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1"; service ZoektConfigurationService { // SearchConfiguration returns the current indexing configuration for the specified repositories. diff --git a/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1/configuration_grpc.pb.go b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1/configuration_grpc.pb.go similarity index 98% rename from cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1/configuration_grpc.pb.go rename to cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1/configuration_grpc.pb.go index e45d3d765..e6650b490 100644 --- a/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1/configuration_grpc.pb.go +++ b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1/configuration_grpc.pb.go @@ -2,7 +2,7 @@ // versions: // - protoc-gen-go-grpc v1.3.0 // - protoc (unknown) -// source: configuration.proto +// source: sourcegraph/zoekt/configuration/v1/configuration.proto package v1 @@ -188,5 +188,5 @@ var ZoektConfigurationService_ServiceDesc = grpc.ServiceDesc{ }, }, Streams: []grpc.StreamDesc{}, - Metadata: "configuration.proto", + Metadata: "sourcegraph/zoekt/configuration/v1/configuration.proto", } diff --git a/cmd/zoekt-sourcegraph-indexserver/grpc/protos/zoekt/indexserver/v1/indexserver.pb.go b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/zoekt/indexserver/v1/indexserver.pb.go new file mode 100644 index 000000000..a97a4310b --- /dev/null +++ b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/zoekt/indexserver/v1/indexserver.pb.go @@ -0,0 +1,203 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.28.1 +// protoc (unknown) +// source: zoekt/indexserver/v1/indexserver.proto + +package v1 + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type DeleteAllDataRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields +} + +func (x *DeleteAllDataRequest) Reset() { + *x = DeleteAllDataRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_zoekt_indexserver_v1_indexserver_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *DeleteAllDataRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DeleteAllDataRequest) ProtoMessage() {} + +func (x *DeleteAllDataRequest) ProtoReflect() protoreflect.Message { + mi := &file_zoekt_indexserver_v1_indexserver_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DeleteAllDataRequest.ProtoReflect.Descriptor instead. +func (*DeleteAllDataRequest) Descriptor() ([]byte, []int) { + return file_zoekt_indexserver_v1_indexserver_proto_rawDescGZIP(), []int{0} +} + +type DeleteAllDataResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields +} + +func (x *DeleteAllDataResponse) Reset() { + *x = DeleteAllDataResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_zoekt_indexserver_v1_indexserver_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *DeleteAllDataResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DeleteAllDataResponse) ProtoMessage() {} + +func (x *DeleteAllDataResponse) ProtoReflect() protoreflect.Message { + mi := &file_zoekt_indexserver_v1_indexserver_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DeleteAllDataResponse.ProtoReflect.Descriptor instead. +func (*DeleteAllDataResponse) Descriptor() ([]byte, []int) { + return file_zoekt_indexserver_v1_indexserver_proto_rawDescGZIP(), []int{1} +} + +var File_zoekt_indexserver_v1_indexserver_proto protoreflect.FileDescriptor + +var file_zoekt_indexserver_v1_indexserver_proto_rawDesc = []byte{ + 0x0a, 0x26, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x73, 0x65, 0x72, + 0x76, 0x65, 0x72, 0x2f, 0x76, 0x31, 0x2f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x73, 0x65, 0x72, 0x76, + 0x65, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x14, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, + 0x69, 0x6e, 0x64, 0x65, 0x78, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x22, 0x16, + 0x0a, 0x14, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x41, 0x6c, 0x6c, 0x44, 0x61, 0x74, 0x61, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x22, 0x17, 0x0a, 0x15, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, + 0x41, 0x6c, 0x6c, 0x44, 0x61, 0x74, 0x61, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x32, + 0x8e, 0x01, 0x0a, 0x1d, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x49, + 0x6e, 0x64, 0x65, 0x78, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, + 0x65, 0x12, 0x6d, 0x0a, 0x0d, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x41, 0x6c, 0x6c, 0x44, 0x61, + 0x74, 0x61, 0x12, 0x2a, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x69, 0x6e, 0x64, 0x65, 0x78, + 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, + 0x41, 0x6c, 0x6c, 0x44, 0x61, 0x74, 0x61, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2b, + 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x73, 0x65, 0x72, 0x76, + 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x41, 0x6c, 0x6c, 0x44, + 0x61, 0x74, 0x61, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x03, 0x90, 0x02, 0x02, + 0x42, 0x5c, 0x5a, 0x5a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, + 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, + 0x2f, 0x63, 0x6d, 0x64, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2d, 0x73, 0x6f, 0x75, 0x72, 0x63, + 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2d, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x73, 0x65, 0x72, 0x76, + 0x65, 0x72, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, + 0x69, 0x6e, 0x64, 0x65, 0x78, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2f, 0x76, 0x31, 0x62, 0x06, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_zoekt_indexserver_v1_indexserver_proto_rawDescOnce sync.Once + file_zoekt_indexserver_v1_indexserver_proto_rawDescData = file_zoekt_indexserver_v1_indexserver_proto_rawDesc +) + +func file_zoekt_indexserver_v1_indexserver_proto_rawDescGZIP() []byte { + file_zoekt_indexserver_v1_indexserver_proto_rawDescOnce.Do(func() { + file_zoekt_indexserver_v1_indexserver_proto_rawDescData = protoimpl.X.CompressGZIP(file_zoekt_indexserver_v1_indexserver_proto_rawDescData) + }) + return file_zoekt_indexserver_v1_indexserver_proto_rawDescData +} + +var file_zoekt_indexserver_v1_indexserver_proto_msgTypes = make([]protoimpl.MessageInfo, 2) +var file_zoekt_indexserver_v1_indexserver_proto_goTypes = []interface{}{ + (*DeleteAllDataRequest)(nil), // 0: zoekt.indexserver.v1.DeleteAllDataRequest + (*DeleteAllDataResponse)(nil), // 1: zoekt.indexserver.v1.DeleteAllDataResponse +} +var file_zoekt_indexserver_v1_indexserver_proto_depIdxs = []int32{ + 0, // 0: zoekt.indexserver.v1.SourcegraphIndexserverService.DeleteAllData:input_type -> zoekt.indexserver.v1.DeleteAllDataRequest + 1, // 1: zoekt.indexserver.v1.SourcegraphIndexserverService.DeleteAllData:output_type -> zoekt.indexserver.v1.DeleteAllDataResponse + 1, // [1:2] is the sub-list for method output_type + 0, // [0:1] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_zoekt_indexserver_v1_indexserver_proto_init() } +func file_zoekt_indexserver_v1_indexserver_proto_init() { + if File_zoekt_indexserver_v1_indexserver_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_zoekt_indexserver_v1_indexserver_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*DeleteAllDataRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_zoekt_indexserver_v1_indexserver_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*DeleteAllDataResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_zoekt_indexserver_v1_indexserver_proto_rawDesc, + NumEnums: 0, + NumMessages: 2, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_zoekt_indexserver_v1_indexserver_proto_goTypes, + DependencyIndexes: file_zoekt_indexserver_v1_indexserver_proto_depIdxs, + MessageInfos: file_zoekt_indexserver_v1_indexserver_proto_msgTypes, + }.Build() + File_zoekt_indexserver_v1_indexserver_proto = out.File + file_zoekt_indexserver_v1_indexserver_proto_rawDesc = nil + file_zoekt_indexserver_v1_indexserver_proto_goTypes = nil + file_zoekt_indexserver_v1_indexserver_proto_depIdxs = nil +} diff --git a/cmd/zoekt-sourcegraph-indexserver/grpc/protos/zoekt/indexserver/v1/indexserver.proto b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/zoekt/indexserver/v1/indexserver.proto new file mode 100644 index 000000000..c28aa5d18 --- /dev/null +++ b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/zoekt/indexserver/v1/indexserver.proto @@ -0,0 +1,17 @@ +syntax = "proto3"; + +package zoekt.indexserver.v1; + +option go_package = "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/protos/zoekt/indexserver/v1"; + +message DeleteAllDataRequest {} + +message DeleteAllDataResponse {} + +service SourcegraphIndexserverService { + // DeleteAllData deletes all data for the tenant in the request context. + // This is used for pruning all data after a tenant has been deleted. + rpc DeleteAllData(DeleteAllDataRequest) returns (DeleteAllDataResponse) { + option idempotency_level = IDEMPOTENT; + } +} diff --git a/cmd/zoekt-sourcegraph-indexserver/grpc/protos/zoekt/indexserver/v1/indexserver_grpc.pb.go b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/zoekt/indexserver/v1/indexserver_grpc.pb.go new file mode 100644 index 000000000..abbfbcc9a --- /dev/null +++ b/cmd/zoekt-sourcegraph-indexserver/grpc/protos/zoekt/indexserver/v1/indexserver_grpc.pb.go @@ -0,0 +1,114 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.3.0 +// - protoc (unknown) +// source: zoekt/indexserver/v1/indexserver.proto + +package v1 + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.32.0 or later. +const _ = grpc.SupportPackageIsVersion7 + +const ( + SourcegraphIndexserverService_DeleteAllData_FullMethodName = "/zoekt.indexserver.v1.SourcegraphIndexserverService/DeleteAllData" +) + +// SourcegraphIndexserverServiceClient is the client API for SourcegraphIndexserverService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type SourcegraphIndexserverServiceClient interface { + // DeleteAllData deletes all data for the tenant in the request context. + // This is used for pruning all data after a tenant has been deleted. + DeleteAllData(ctx context.Context, in *DeleteAllDataRequest, opts ...grpc.CallOption) (*DeleteAllDataResponse, error) +} + +type sourcegraphIndexserverServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewSourcegraphIndexserverServiceClient(cc grpc.ClientConnInterface) SourcegraphIndexserverServiceClient { + return &sourcegraphIndexserverServiceClient{cc} +} + +func (c *sourcegraphIndexserverServiceClient) DeleteAllData(ctx context.Context, in *DeleteAllDataRequest, opts ...grpc.CallOption) (*DeleteAllDataResponse, error) { + out := new(DeleteAllDataResponse) + err := c.cc.Invoke(ctx, SourcegraphIndexserverService_DeleteAllData_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +// SourcegraphIndexserverServiceServer is the server API for SourcegraphIndexserverService service. +// All implementations must embed UnimplementedSourcegraphIndexserverServiceServer +// for forward compatibility +type SourcegraphIndexserverServiceServer interface { + // DeleteAllData deletes all data for the tenant in the request context. + // This is used for pruning all data after a tenant has been deleted. + DeleteAllData(context.Context, *DeleteAllDataRequest) (*DeleteAllDataResponse, error) + mustEmbedUnimplementedSourcegraphIndexserverServiceServer() +} + +// UnimplementedSourcegraphIndexserverServiceServer must be embedded to have forward compatible implementations. +type UnimplementedSourcegraphIndexserverServiceServer struct { +} + +func (UnimplementedSourcegraphIndexserverServiceServer) DeleteAllData(context.Context, *DeleteAllDataRequest) (*DeleteAllDataResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method DeleteAllData not implemented") +} +func (UnimplementedSourcegraphIndexserverServiceServer) mustEmbedUnimplementedSourcegraphIndexserverServiceServer() { +} + +// UnsafeSourcegraphIndexserverServiceServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to SourcegraphIndexserverServiceServer will +// result in compilation errors. +type UnsafeSourcegraphIndexserverServiceServer interface { + mustEmbedUnimplementedSourcegraphIndexserverServiceServer() +} + +func RegisterSourcegraphIndexserverServiceServer(s grpc.ServiceRegistrar, srv SourcegraphIndexserverServiceServer) { + s.RegisterService(&SourcegraphIndexserverService_ServiceDesc, srv) +} + +func _SourcegraphIndexserverService_DeleteAllData_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(DeleteAllDataRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(SourcegraphIndexserverServiceServer).DeleteAllData(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: SourcegraphIndexserverService_DeleteAllData_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(SourcegraphIndexserverServiceServer).DeleteAllData(ctx, req.(*DeleteAllDataRequest)) + } + return interceptor(ctx, in, info, handler) +} + +// SourcegraphIndexserverService_ServiceDesc is the grpc.ServiceDesc for SourcegraphIndexserverService service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var SourcegraphIndexserverService_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "zoekt.indexserver.v1.SourcegraphIndexserverService", + HandlerType: (*SourcegraphIndexserverServiceServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "DeleteAllData", + Handler: _SourcegraphIndexserverService_DeleteAllData_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "zoekt/indexserver/v1/indexserver.proto", +} diff --git a/cmd/zoekt-sourcegraph-indexserver/index.go b/cmd/zoekt-sourcegraph-indexserver/index.go index fdd3ead85..36f55331b 100644 --- a/cmd/zoekt-sourcegraph-indexserver/index.go +++ b/cmd/zoekt-sourcegraph-indexserver/index.go @@ -19,9 +19,9 @@ import ( sglog "github.com/sourcegraph/log" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" - "github.com/sourcegraph/zoekt/ctags" - "github.com/sourcegraph/zoekt/internal/tenant" + configv1 "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1" + "github.com/sourcegraph/zoekt/index" + "github.com/sourcegraph/zoekt/internal/ctags" ) const defaultIndexingTimeout = 1*time.Hour + 30*time.Minute @@ -85,9 +85,6 @@ type indexArgs struct { // Parallelism is the number of shards to compute in parallel. Parallelism int - // FileLimit is the maximum size of a file - FileLimit int - // UseDelta is true if we want to use the new delta indexer. This should // only be true for repositories we explicitly enable. UseDelta bool @@ -100,26 +97,21 @@ type indexArgs struct { ShardMerging bool } -// BuildOptions returns a build.Options represented by indexArgs. Note: it +// BuildOptions returns a index.Options represented by indexArgs. Note: it // doesn't set fields like repository/branch. -func (o *indexArgs) BuildOptions() *build.Options { - shardPrefix := "" - if tenant.EnforceTenant() { - shardPrefix = tenant.SrcPrefix(o.TenantID, o.RepoID) - } - - return &build.Options{ +func (o *indexArgs) BuildOptions() *index.Options { + return &index.Options{ // It is important that this RepositoryDescription exactly matches what // the indexer we call will produce. This is to ensure that // IncrementalSkipIndexing and IndexState can correctly calculate if // nothing needs to be done. RepositoryDescription: zoekt.Repository{ TenantID: o.TenantID, - ID: uint32(o.IndexOptions.RepoID), + ID: o.RepoID, Name: o.Name, Branches: o.Branches, RawConfig: map[string]string{ - "repoid": strconv.Itoa(int(o.IndexOptions.RepoID)), + "repoid": strconv.Itoa(int(o.RepoID)), "priority": strconv.FormatFloat(o.Priority, 'g', -1, 64), "public": marshalBool(o.Public), "fork": marshalBool(o.Fork), @@ -131,7 +123,7 @@ func (o *indexArgs) BuildOptions() *build.Options { }, IndexDir: o.IndexDir, Parallelism: o.Parallelism, - SizeMax: o.FileLimit, + SizeMax: MaxFileSize, LargeFiles: o.LargeFiles, CTagsMustSucceed: o.Symbols, DisableCTags: !o.Symbols, @@ -140,8 +132,6 @@ func (o *indexArgs) BuildOptions() *build.Options { LanguageMap: o.LanguageMap, ShardMerging: o.ShardMerging, - - ShardPrefix: shardPrefix, } } @@ -181,7 +171,7 @@ type gitIndexConfig struct { timeout time.Duration } -func gitIndex(c gitIndexConfig, o *indexArgs, sourcegraph Sourcegraph, l sglog.Logger) error { +func gitIndex(ctx context.Context, c gitIndexConfig, o *indexArgs, sourcegraph Sourcegraph, l sglog.Logger) error { logger := l.Scoped("gitIndex") if len(o.Branches) == 0 { @@ -196,7 +186,7 @@ func gitIndex(c gitIndexConfig, o *indexArgs, sourcegraph Sourcegraph, l sglog.L return errors.New("findRepositoryMetadata in provided configuration was nil - a function must be provided") } - ctx, cancel := context.WithTimeout(context.Background(), c.timeout) + ctx, cancel := context.WithTimeout(ctx, c.timeout) defer cancel() gitDir, err := tmpGitDir(o.Name) @@ -239,6 +229,17 @@ func fetchRepo(ctx context.Context, gitDir string, o *indexArgs, c gitIndexConfi return err } + for _, header := range []string{ + "X-Sourcegraph-Actor-UID: internal", + "X-Sourcegraph-Tenant-ID: " + strconv.Itoa(o.TenantID), + } { + cmd = exec.CommandContext(ctx, "git", "-C", gitDir, "config", "--add", "http.extraHeader", header) + cmd.Stdin = &bytes.Buffer{} + if err := c.runCmd(cmd); err != nil { + return err + } + } + var fetchDuration time.Duration successfullyFetchedCommitsCount := 0 allFetchesSucceeded := true @@ -256,14 +257,13 @@ func fetchRepo(ctx context.Context, gitDir string, o *indexArgs, c gitIndexConfi fetchArgs := []string{ "-C", gitDir, "-c", "protocol.version=2", - "-c", "http.extraHeader=X-Sourcegraph-Actor-UID: internal", - "-c", "http.extraHeader=X-Sourcegraph-Tenant-ID: " + strconv.Itoa(o.TenantID), "fetch", "--depth=1", "--no-tags", } - // If there are no exceptions to MaxFileSize (1MB), we can avoid fetching these large files. + // Git's blob:limit filter excludes blobs whose size is >= the given limit, + // while zoekt indexes files up to and including FileLimit bytes. if len(o.LargeFiles) == 0 { - fetchArgs = append(fetchArgs, "--filter=blob:limit=1m") + fetchArgs = append(fetchArgs, fmt.Sprintf("--filter=blob:limit=%d", int64(MaxFileSize)+1)) } fetchArgs = append(fetchArgs, o.CloneURL) @@ -451,3 +451,38 @@ func tmpGitDir(name string) (string, error) { } return dir, nil } + +// FromProto converts a ZoektIndexOptions proto message into an IndexOptions struct. +func (o *IndexOptions) FromProto(x *configv1.ZoektIndexOptions) { + branches := make([]zoekt.RepositoryBranch, 0, len(x.Branches)) + for _, b := range x.GetBranches() { + branches = append(branches, zoekt.RepositoryBranch{ + Name: b.GetName(), + Version: b.GetVersion(), + }) + } + + languageMap := make(map[string]ctags.CTagsParserType) + for _, lang := range x.GetLanguageMap() { + languageMap[lang.GetLanguage()] = ctags.CTagsParserType(lang.GetCtags().Number()) + } + + *o = IndexOptions{ + RepoID: uint32(x.GetRepoId()), + LargeFiles: x.GetLargeFiles(), + Symbols: x.GetSymbols(), + Branches: branches, + Name: x.GetName(), + + Priority: x.GetPriority(), + + Public: x.GetPublic(), + Fork: x.GetFork(), + Archived: x.GetArchived(), + + LanguageMap: languageMap, + ShardConcurrency: x.GetShardConcurrency(), + + TenantID: int(x.TenantId), + } +} diff --git a/cmd/zoekt-sourcegraph-indexserver/index_integration_test.go b/cmd/zoekt-sourcegraph-indexserver/index_integration_test.go new file mode 100644 index 000000000..fc89a4598 --- /dev/null +++ b/cmd/zoekt-sourcegraph-indexserver/index_integration_test.go @@ -0,0 +1,401 @@ +package main + +import ( + "context" + "fmt" + "net" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/sourcegraph/log/logtest" + "github.com/stretchr/testify/require" + + "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/gitindex" + "github.com/sourcegraph/zoekt/query" + "github.com/sourcegraph/zoekt/search" +) + +func TestFetchRepoAndIndex_Integration(t *testing.T) { + requireGitDaemon(t) + + for _, tc := range []struct { + name string + disableGoGitOptimization bool + }{ + {name: "optimized repo open"}, + {name: "legacy repo open", disableGoGitOptimization: true}, + } { + t.Run(tc.name, func(t *testing.T) { + require := require.New(t) + + ctx := context.Background() + fixture := newGitFetchFixture(t) + + if tc.disableGoGitOptimization { + t.Setenv("ZOEKT_DISABLE_GOGIT_OPTIMIZATION", "true") + } else { + t.Setenv("ZOEKT_DISABLE_GOGIT_OPTIMIZATION", "false") + } + + sg := &recordingSourcegraph{ + opts: IndexOptions{ + RepoID: 123, + Name: "test/repo", + CloneURL: fixture.cloneURL, + Symbols: false, + Branches: []zoekt.RepositoryBranch{ + {Name: "HEAD", Version: fixture.mainCommit}, + {Name: "dev", Version: fixture.devCommit}, + }, + TenantID: 1, + }, + } + + indexDir := t.TempDir() + server := &Server{ + Sourcegraph: sg, + IndexDir: indexDir, + CPUCount: 1, + IndexConcurrency: 1, + } + + result, err := sg.List(ctx, nil) + require.NoError(err) + + var args *indexArgs + result.IterateIndexOptions(func(opts IndexOptions) { + args = server.indexArgs(opts) + }) + require.NotNil(args) + + gitDir := filepath.Join(t.TempDir(), "fetch.git") + c := gitIndexConfig{ + runCmd: runIntegrationCommand, + findRepositoryMetadata: func(args *indexArgs) (*zoekt.Repository, *zoekt.IndexMetadata, bool, error) { + return args.BuildOptions().FindRepositoryMetadata() + }, + timeout: time.Minute, + } + + require.NoError(fetchRepo(ctx, gitDir, args, c, logtest.Scoped(t))) + assertPartialBareFetch(t, gitDir, fixture) + + require.NoError(setZoektConfig(ctx, gitDir, args, c)) + + updated, err := gitindex.IndexGitRepo(gitIndexOptionsForTest(args, gitDir)) + require.NoError(err) + require.True(updated) + + repository, metadata, ok, err := args.BuildOptions().FindRepositoryMetadata() + require.NoError(err) + require.True(ok) + require.Equal(args.Name, repository.Name) + require.Equal(args.RepoID, repository.ID) + require.Equal(args.TenantID, repository.TenantID) + if diff := cmp.Diff(args.Branches, repository.Branches); diff != "" { + t.Fatalf("branches mismatch (-want +got):\n%s", diff) + } + require.Equal("123", repository.RawConfig["repoid"]) + require.Equal("1", repository.RawConfig["tenantID"]) + + searcher, err := search.NewDirectorySearcher(indexDir) + require.NoError(err) + defer searcher.Close() + + assertSearchContains(t, searcher, "smallneedle", "small.txt") + assertSearchContains(t, searcher, "devneedle", "dev.txt") + assertSearchEmpty(t, searcher, "largeneedle") + + require.NoError(updateIndexStatusOnSourcegraph(c, args, sg)) + require.Len(sg.updates, 1) + require.Len(sg.updates[0], 1) + require.Equal(args.RepoID, sg.updates[0][0].RepoID) + require.Equal(metadata.IndexTime.Unix(), sg.updates[0][0].IndexTimeUnix) + if diff := cmp.Diff(args.Branches, sg.updates[0][0].Branches); diff != "" { + t.Fatalf("status branches mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func requireGitDaemon(t *testing.T) { + t.Helper() + + cmd := exec.Command("git", "daemon", "-h") + cmd.Env = gitTestEnv() + output, err := cmd.CombinedOutput() + text := string(output) + + if strings.Contains(text, "usage: git daemon") { + return + } + + if strings.Contains(text, "git: 'daemon' is not a git command") { + t.Skipf("skipping integration test: git daemon is unavailable: %s", strings.TrimSpace(text)) + } + + if err == nil { + return + } + + t.Fatalf("failed to probe git daemon availability: %v\n%s", err, text) +} + +type recordingSourcegraph struct { + opts IndexOptions + updates [][]indexStatus +} + +func (s *recordingSourcegraph) List(ctx context.Context, indexed []uint32) (*SourcegraphListResult, error) { + return &SourcegraphListResult{ + IDs: []uint32{s.opts.RepoID}, + IterateIndexOptions: func(yield func(IndexOptions)) { + yield(s.opts) + }, + }, nil +} + +func (s *recordingSourcegraph) ForceIterateIndexOptions(onSuccess func(IndexOptions), onError func(uint32, error), repos ...uint32) { + onSuccess(s.opts) +} + +func (s *recordingSourcegraph) UpdateIndexStatus(repositories []indexStatus) error { + cp := make([]indexStatus, len(repositories)) + copy(cp, repositories) + s.updates = append(s.updates, cp) + return nil +} + +type gitFetchFixture struct { + cloneURL string + mainCommit string + devCommit string + bigBlob string + daemon *gitDaemon +} + +func newGitFetchFixture(t *testing.T) *gitFetchFixture { + t.Helper() + + root := t.TempDir() + worktree := filepath.Join(root, "worktree") + serveRoot := filepath.Join(root, "serve") + remoteDir := filepath.Join(serveRoot, "repo") + + require.NoError(t, os.MkdirAll(worktree, 0o755)) + require.NoError(t, os.MkdirAll(serveRoot, 0o755)) + + runGit(t, root, "init", "-b", "main", worktree) + runGit(t, worktree, "config", "user.name", "Test User") + runGit(t, worktree, "config", "user.email", "test@example.com") + + require.NoError(t, os.WriteFile(filepath.Join(worktree, "small.txt"), []byte("smallneedle\n"), 0o644)) + large := strings.Repeat("x", MaxFileSize+1024) + require.NoError(t, os.WriteFile(filepath.Join(worktree, "big.bin"), []byte("largeneedle\n"+large), 0o644)) + runGit(t, worktree, "add", "small.txt", "big.bin") + runGit(t, worktree, "commit", "-m", "main commit") + + mainCommit := strings.TrimSpace(runGitOutput(t, worktree, "rev-parse", "HEAD")) + bigBlob := strings.TrimSpace(runGitOutput(t, worktree, "rev-parse", "HEAD:big.bin")) + + runGit(t, worktree, "checkout", "-b", "dev") + require.NoError(t, os.WriteFile(filepath.Join(worktree, "dev.txt"), []byte("devneedle\n"), 0o644)) + runGit(t, worktree, "add", "dev.txt") + runGit(t, worktree, "commit", "-m", "dev commit") + + devCommit := strings.TrimSpace(runGitOutput(t, worktree, "rev-parse", "HEAD")) + runGit(t, root, "clone", "--bare", worktree, remoteDir) + runGit(t, remoteDir, "config", "uploadpack.allowFilter", "true") + runGit(t, remoteDir, "config", "uploadpack.allowAnySHA1InWant", "true") + + daemon := startGitDaemon(t, serveRoot) + + return &gitFetchFixture{ + cloneURL: fmt.Sprintf("git://127.0.0.1:%d/repo", daemon.port), + mainCommit: mainCommit, + devCommit: devCommit, + bigBlob: bigBlob, + daemon: daemon, + } +} + +type gitDaemon struct { + cmd *exec.Cmd + logPath string + port int +} + +func startGitDaemon(t *testing.T, serveRoot string) *gitDaemon { + t.Helper() + + port := allocatePort(t) + logFile, err := os.CreateTemp(t.TempDir(), "git-daemon-*.log") + require.NoError(t, err) + logPath := logFile.Name() + cmd := exec.Command("git", "daemon", + "--verbose", + "--export-all", + "--reuseaddr", + "--base-path="+serveRoot, + "--listen=127.0.0.1", + fmt.Sprintf("--port=%d", port), + serveRoot, + ) + cmd.Env = gitTestEnv() + cmd.Stdout = logFile + cmd.Stderr = logFile + + require.NoError(t, cmd.Start()) + require.NoError(t, logFile.Close()) + waitForGitDaemon(t, port, logPath) + + daemon := &gitDaemon{cmd: cmd, logPath: logPath, port: port} + t.Cleanup(func() { + if cmd.Process != nil { + _ = cmd.Process.Kill() + } + waitDone := make(chan struct{}) + go func() { + _ = cmd.Wait() + close(waitDone) + }() + + select { + case <-waitDone: + case <-time.After(5 * time.Second): + } + }) + + return daemon +} + +func waitForGitDaemon(t *testing.T, port int, logPath string) { + t.Helper() + + addr := fmt.Sprintf("127.0.0.1:%d", port) + deadline := time.Now().Add(5 * time.Second) + + for time.Now().Before(deadline) { + conn, err := net.DialTimeout("tcp", addr, 100*time.Millisecond) + if err == nil { + _ = conn.Close() + return + } + + time.Sleep(50 * time.Millisecond) + } + + contents, err := os.ReadFile(logPath) + if err != nil { + t.Fatalf("git daemon did not start listening on %s within 5s (failed to read log: %v)", addr, err) + } + + t.Fatalf("git daemon did not start listening on %s within 5s\n%s", addr, contents) +} + +func allocatePort(t *testing.T) int { + t.Helper() + + listener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + defer listener.Close() + + return listener.Addr().(*net.TCPAddr).Port +} + +func gitIndexOptionsForTest(args *indexArgs, repoDir string) gitindex.Options { + buildOptions := *args.BuildOptions() + buildOptions.RepositoryDescription.Branches = nil + + branches := make([]string, 0, len(args.Branches)) + for _, branch := range args.Branches { + branches = append(branches, branch.Name) + } + + return gitindex.Options{ + RepoDir: repoDir, + Submodules: false, + Incremental: args.Incremental, + BuildOptions: buildOptions, + BranchPrefix: "refs/heads/", + Branches: branches, + DeltaShardNumberFallbackThreshold: args.DeltaShardNumberFallbackThreshold, + } +} + +func assertPartialBareFetch(t *testing.T, gitDir string, fixture *gitFetchFixture) { + t.Helper() + require := require.New(t) + + require.Equal(fixture.mainCommit, strings.TrimSpace(runGitOutput(t, gitDir, "rev-parse", "HEAD"))) + require.Equal(fixture.devCommit, strings.TrimSpace(runGitOutput(t, gitDir, "rev-parse", "refs/heads/dev"))) + + promisors, err := filepath.Glob(filepath.Join(gitDir, "objects", "pack", "*.promisor")) + require.NoError(err) + require.NotEmpty(promisors) + + objects := runGitOutput(t, gitDir, "rev-list", "--objects", "--missing=print", "HEAD", "refs/heads/dev") + require.Contains(objects, fixture.mainCommit) + require.Contains(objects, fixture.devCommit) + require.Contains(objects, "?"+fixture.bigBlob) +} + +func assertSearchContains(t *testing.T, searcher zoekt.Searcher, pattern string, wantFile string) { + t.Helper() + require := require.New(t) + + result, err := searcher.Search(context.Background(), &query.Substring{Pattern: pattern}, &zoekt.SearchOptions{}) + require.NoError(err) + require.Len(result.Files, 1) + require.Equal(wantFile, result.Files[0].FileName) +} + +func assertSearchEmpty(t *testing.T, searcher zoekt.Searcher, pattern string) { + t.Helper() + require := require.New(t) + + result, err := searcher.Search(context.Background(), &query.Substring{Pattern: pattern}, &zoekt.SearchOptions{}) + require.NoError(err) + require.Empty(result.Files) +} + +func runIntegrationCommand(cmd *exec.Cmd) error { + cmd.Env = gitTestEnv() + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("%s: %w\n%s", strings.Join(cmd.Args, " "), err, output) + } + return nil +} + +func runGit(t *testing.T, dir string, args ...string) { + t.Helper() + _ = runGitOutput(t, dir, args...) +} + +func runGitOutput(t *testing.T, dir string, args ...string) string { + t.Helper() + + cmd := exec.Command("git", args...) + cmd.Dir = dir + cmd.Env = gitTestEnv() + output, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("git %s failed: %v\n%s", strings.Join(args, " "), err, output) + } + + return string(output) +} + +func gitTestEnv() []string { + return append(os.Environ(), + "GIT_CONFIG_GLOBAL=", + "GIT_CONFIG_SYSTEM=", + ) +} diff --git a/cmd/zoekt-sourcegraph-indexserver/index_test.go b/cmd/zoekt-sourcegraph-indexserver/index_test.go index f4f55a83c..45b4fc355 100644 --- a/cmd/zoekt-sourcegraph-indexserver/index_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/index_test.go @@ -8,60 +8,58 @@ import ( "os" "os/exec" "path/filepath" - "sort" "strings" "testing" "time" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/sourcegraph/log/logtest" "github.com/stretchr/testify/require" "google.golang.org/grpc" "google.golang.org/protobuf/testing/protocmp" "google.golang.org/protobuf/types/known/timestamppb" - proto "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1" - "github.com/sourcegraph/zoekt/ctags" - "github.com/sourcegraph/zoekt/internal/tenant/tenanttest" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" + "slices" "github.com/sourcegraph/zoekt" + configv1 "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1" + "github.com/sourcegraph/zoekt/internal/ctags" ) func TestIterateIndexOptions_Fingerprint(t *testing.T) { - fingerprintV0 := &proto.Fingerprint{ + fingerprintV0 := &configv1.Fingerprint{ Identifier: 100, GeneratedAt: timestamppb.New(time.Unix(100, 0)), } - fingerprintV1 := &proto.Fingerprint{ + fingerprintV1 := &configv1.Fingerprint{ Identifier: 101, GeneratedAt: timestamppb.New(time.Unix(101, 0)), } - fingerprintV2 := &proto.Fingerprint{ + fingerprintV2 := &configv1.Fingerprint{ Identifier: 102, GeneratedAt: timestamppb.New(time.Unix(102, 0)), } - mkSearchConfigurationResponse := func(fingerprint *proto.Fingerprint, repoIDs ...int32) *proto.SearchConfigurationResponse { - repositories := make([]*proto.ZoektIndexOptions, 0, len(repoIDs)) + mkSearchConfigurationResponse := func(fingerprint *configv1.Fingerprint, repoIDs ...int32) *configv1.SearchConfigurationResponse { + repositories := make([]*configv1.ZoektIndexOptions, 0, len(repoIDs)) for _, repoID := range repoIDs { - repositories = append(repositories, &proto.ZoektIndexOptions{ + repositories = append(repositories, &configv1.ZoektIndexOptions{ RepoId: repoID, }) } - return &proto.SearchConfigurationResponse{ + return &configv1.SearchConfigurationResponse{ UpdatedOptions: repositories, Fingerprint: fingerprint, } } grpcClient := &mockGRPCClient{ - mockList: func(_ context.Context, in *proto.ListRequest, opts ...grpc.CallOption) (*proto.ListResponse, error) { - return &proto.ListResponse{ + mockList: func(_ context.Context, in *configv1.ListRequest, opts ...grpc.CallOption) (*configv1.ListResponse, error) { + return &configv1.ListResponse{ RepoIds: []int32{1, 2, 3}, }, nil }, @@ -77,8 +75,8 @@ func TestIterateIndexOptions_Fingerprint(t *testing.T) { type step struct { name string - wantFingerprint *proto.Fingerprint - returnFingerprint *proto.Fingerprint + wantFingerprint *configv1.Fingerprint + returnFingerprint *configv1.Fingerprint returnErr error skipCheckingRepoIDs bool } @@ -110,7 +108,7 @@ func TestIterateIndexOptions_Fingerprint(t *testing.T) { } { t.Run(step.name, func(t *testing.T) { called := false - grpcClient.mockSearchConfiguration = func(_ context.Context, in *proto.SearchConfigurationRequest, opts ...grpc.CallOption) (*proto.SearchConfigurationResponse, error) { + grpcClient.mockSearchConfiguration = func(_ context.Context, in *configv1.SearchConfigurationRequest, opts ...grpc.CallOption) (*configv1.SearchConfigurationResponse, error) { called = true diff := cmp.Diff(step.wantFingerprint, in.GetFingerprint(), protocmp.Transform()) @@ -139,14 +137,10 @@ func TestIterateIndexOptions_Fingerprint(t *testing.T) { return } - sort.Slice(iteratedIDs, func(i, j int) bool { - return iteratedIDs[i] < iteratedIDs[j] - }) + slices.Sort(iteratedIDs) expectedIDs := []uint32{1, 2, 3} - sort.Slice(expectedIDs, func(i, j int) bool { - return expectedIDs[i] < expectedIDs[j] - }) + slices.Sort(expectedIDs) if diff := cmp.Diff(expectedIDs, iteratedIDs); diff != "" { t.Fatalf("unexpected repo ids (-want +got):\n%s", diff) @@ -159,7 +153,7 @@ func TestGetIndexOptions(t *testing.T) { type testCase struct { name string - response *proto.SearchConfigurationResponse + response *configv1.SearchConfigurationResponse want *IndexOptions wantErr string } @@ -167,8 +161,8 @@ func TestGetIndexOptions(t *testing.T) { for _, tc := range []testCase{ { name: "symbols, large files", - response: &proto.SearchConfigurationResponse{ - UpdatedOptions: []*proto.ZoektIndexOptions{ + response: &configv1.SearchConfigurationResponse{ + UpdatedOptions: []*configv1.ZoektIndexOptions{ { Symbols: true, LargeFiles: []string{"foo", "bar"}, @@ -182,8 +176,8 @@ func TestGetIndexOptions(t *testing.T) { }, { name: "no symbols , large files", - response: &proto.SearchConfigurationResponse{ - UpdatedOptions: []*proto.ZoektIndexOptions{ + response: &configv1.SearchConfigurationResponse{ + UpdatedOptions: []*configv1.ZoektIndexOptions{ { Symbols: true, LargeFiles: []string{"foo", "bar"}, @@ -204,8 +198,8 @@ func TestGetIndexOptions(t *testing.T) { { name: "symbols", - response: &proto.SearchConfigurationResponse{ - UpdatedOptions: []*proto.ZoektIndexOptions{ + response: &configv1.SearchConfigurationResponse{ + UpdatedOptions: []*configv1.ZoektIndexOptions{ { Symbols: true, }, @@ -217,8 +211,8 @@ func TestGetIndexOptions(t *testing.T) { }, { name: "repoID", - response: &proto.SearchConfigurationResponse{ - UpdatedOptions: []*proto.ZoektIndexOptions{ + response: &configv1.SearchConfigurationResponse{ + UpdatedOptions: []*configv1.ZoektIndexOptions{ { RepoId: 123, }, @@ -230,8 +224,8 @@ func TestGetIndexOptions(t *testing.T) { }, { name: "error", - response: &proto.SearchConfigurationResponse{ - UpdatedOptions: []*proto.ZoektIndexOptions{ + response: &configv1.SearchConfigurationResponse{ + UpdatedOptions: []*configv1.ZoektIndexOptions{ { Error: "boom", }, @@ -243,7 +237,7 @@ func TestGetIndexOptions(t *testing.T) { } { called := false mockClient := &mockGRPCClient{ - mockSearchConfiguration: func(_ context.Context, _ *proto.SearchConfigurationRequest, _ ...grpc.CallOption) (*proto.SearchConfigurationResponse, error) { + mockSearchConfiguration: func(_ context.Context, _ *configv1.SearchConfigurationRequest, _ ...grpc.CallOption) (*configv1.SearchConfigurationResponse, error) { called = true return tc.response, nil }, @@ -296,7 +290,7 @@ func TestGetIndexOptions(t *testing.T) { called := false mockClient := &mockGRPCClient{ - mockSearchConfiguration: func(_ context.Context, _ *proto.SearchConfigurationRequest, _ ...grpc.CallOption) (*proto.SearchConfigurationResponse, error) { + mockSearchConfiguration: func(_ context.Context, _ *configv1.SearchConfigurationRequest, _ ...grpc.CallOption) (*configv1.SearchConfigurationResponse, error) { called = true return nil, nil }, @@ -334,9 +328,9 @@ func TestGetIndexOptions(t *testing.T) { } }) - var response *proto.SearchConfigurationResponse + var response *configv1.SearchConfigurationResponse mockClient := &mockGRPCClient{ - mockSearchConfiguration: func(_ context.Context, req *proto.SearchConfigurationRequest, _ ...grpc.CallOption) (*proto.SearchConfigurationResponse, error) { + mockSearchConfiguration: func(_ context.Context, req *configv1.SearchConfigurationRequest, _ ...grpc.CallOption) (*configv1.SearchConfigurationResponse, error) { if len(req.GetRepoIds()) == 0 || req.GetRepoIds()[0] != 123 { return nil, errors.New("invalid repo id") } @@ -347,12 +341,12 @@ func TestGetIndexOptions(t *testing.T) { sg := newSourcegraphClient(&url.URL{Path: "/"}, "", mockClient, WithBatchSize(0)) cases := []struct { - Response *proto.SearchConfigurationResponse + Response *configv1.SearchConfigurationResponse *IndexOptions }{ { - Response: &proto.SearchConfigurationResponse{ - UpdatedOptions: []*proto.ZoektIndexOptions{ + Response: &configv1.SearchConfigurationResponse{ + UpdatedOptions: []*configv1.ZoektIndexOptions{ { Symbols: true, LargeFiles: []string{"foo", "bar"}, @@ -368,8 +362,8 @@ func TestGetIndexOptions(t *testing.T) { }, { - Response: &proto.SearchConfigurationResponse{ - UpdatedOptions: []*proto.ZoektIndexOptions{ + Response: &configv1.SearchConfigurationResponse{ + UpdatedOptions: []*configv1.ZoektIndexOptions{ { Symbols: false, LargeFiles: []string{"foo", "bar"}, @@ -384,12 +378,12 @@ func TestGetIndexOptions(t *testing.T) { }, { - Response: &proto.SearchConfigurationResponse{}, + Response: &configv1.SearchConfigurationResponse{}, }, { - Response: &proto.SearchConfigurationResponse{ - UpdatedOptions: []*proto.ZoektIndexOptions{ + Response: &configv1.SearchConfigurationResponse{ + UpdatedOptions: []*configv1.ZoektIndexOptions{ { Symbols: true, }, @@ -403,8 +397,8 @@ func TestGetIndexOptions(t *testing.T) { }, { - Response: &proto.SearchConfigurationResponse{ - UpdatedOptions: []*proto.ZoektIndexOptions{ + Response: &configv1.SearchConfigurationResponse{ + UpdatedOptions: []*configv1.ZoektIndexOptions{ { RepoId: 123, }, @@ -418,8 +412,8 @@ func TestGetIndexOptions(t *testing.T) { }, { - Response: &proto.SearchConfigurationResponse{ - UpdatedOptions: []*proto.ZoektIndexOptions{ + Response: &configv1.SearchConfigurationResponse{ + UpdatedOptions: []*configv1.ZoektIndexOptions{ { Error: "boom", }, @@ -456,7 +450,7 @@ func TestGetIndexOptions(t *testing.T) { // Special case our fingerprint API which doesn't return anything if the // repo hasn't changed. t.Run("unchanged", func(t *testing.T) { - response = &proto.SearchConfigurationResponse{} + response = &configv1.SearchConfigurationResponse{} got := false var err error @@ -475,76 +469,6 @@ func TestGetIndexOptions(t *testing.T) { }) } -func TestIndexTenant(t *testing.T) { - tenanttest.MockEnforce(t) - - cases := []struct { - name string - args indexArgs - mockRepositoryMetadata *zoekt.Repository - want []string - }{ - { - name: "prefix", - args: indexArgs{ - IndexOptions: IndexOptions{ - RepoID: 13, - Name: "test/repo", - CloneURL: "http://api.test/.internal/git/test/repo", - Branches: []zoekt.RepositoryBranch{{Name: "HEAD", Version: "deadbeef"}}, - TenantID: 42, - }, - }, - want: []string{ - "git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git", - "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 42 fetch --depth=1 --no-tags --filter=blob:limit=1m http://api.test/.internal/git/test/repo deadbeef", - "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", - "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", - "git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0", - "git -C $TMPDIR/test%2Frepo.git config zoekt.latestCommitDate 1", - "git -C $TMPDIR/test%2Frepo.git config zoekt.name test/repo", - "git -C $TMPDIR/test%2Frepo.git config zoekt.priority 0", - "git -C $TMPDIR/test%2Frepo.git config zoekt.public 0", - "git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 13", - "git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 42", - "zoekt-git-index -submodules=false -branches HEAD -disable_ctags -shard_prefix 000000042_000000013 $TMPDIR/test%2Frepo.git", - }, - }, - } - - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - var got []string - runCmd := func(c *exec.Cmd) error { - cmd := strings.Join(c.Args, " ") - cmd = strings.ReplaceAll(cmd, filepath.Clean(os.TempDir()), "$TMPDIR") - got = append(got, cmd) - return nil - } - - findRepositoryMetadata := func(args *indexArgs) (repository *zoekt.Repository, metadata *zoekt.IndexMetadata, ok bool, err error) { - if tc.mockRepositoryMetadata == nil { - return args.BuildOptions().FindRepositoryMetadata() - } - - return tc.mockRepositoryMetadata, &zoekt.IndexMetadata{}, true, nil - } - - c := gitIndexConfig{ - runCmd: runCmd, - findRepositoryMetadata: findRepositoryMetadata, - } - - if err := gitIndex(c, &tc.args, sourcegraphNop{}, logtest.Scoped(t)); err != nil { - t.Fatal(err) - } - if !cmp.Equal(got, tc.want) { - t.Errorf("git mismatch (-want +got):\n%s", cmp.Diff(tc.want, got, splitargs)) - } - }) - } -} - func TestIndex(t *testing.T) { cases := []struct { name string @@ -563,7 +487,9 @@ func TestIndex(t *testing.T) { }, want: []string{ "git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git", - "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 42 fetch --depth=1 --no-tags --filter=blob:limit=1m http://api.test/.internal/git/test/repo deadbeef", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 42", + "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags --filter=blob:limit=1048577 http://api.test/.internal/git/test/repo deadbeef", "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", "git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0", @@ -573,7 +499,7 @@ func TestIndex(t *testing.T) { "git -C $TMPDIR/test%2Frepo.git config zoekt.public 0", "git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0", "git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 42", - "zoekt-git-index -submodules=false -branches HEAD -disable_ctags $TMPDIR/test%2Frepo.git", + "zoekt-git-index -submodules=false -branches HEAD -file_limit 1048576 -disable_ctags $TMPDIR/test%2Frepo.git", }, }, { name: "minimal-id", @@ -588,7 +514,9 @@ func TestIndex(t *testing.T) { }, want: []string{ "git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git", - "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 1 fetch --depth=1 --no-tags --filter=blob:limit=1m http://api.test/.internal/git/test/repo deadbeef", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 1", + "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags --filter=blob:limit=1048577 http://api.test/.internal/git/test/repo deadbeef", "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", "git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0", @@ -598,7 +526,7 @@ func TestIndex(t *testing.T) { "git -C $TMPDIR/test%2Frepo.git config zoekt.public 0", "git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 123", "git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1", - "zoekt-git-index -submodules=false -branches HEAD -disable_ctags $TMPDIR/test%2Frepo.git", + "zoekt-git-index -submodules=false -branches HEAD -file_limit 1048576 -disable_ctags $TMPDIR/test%2Frepo.git", }, }, { name: "all", @@ -606,7 +534,6 @@ func TestIndex(t *testing.T) { Incremental: true, IndexDir: "/data/index", Parallelism: 4, - FileLimit: 123, IndexOptions: IndexOptions{ Name: "test/repo", CloneURL: "http://api.test/.internal/git/test/repo", @@ -621,7 +548,9 @@ func TestIndex(t *testing.T) { }, want: []string{ "git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git", - "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 1 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 1", + "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed", "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", "git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/dev feebdaed", "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", @@ -633,7 +562,7 @@ func TestIndex(t *testing.T) { "git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0", "git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1", "zoekt-git-index -submodules=false -incremental -branches HEAD,dev " + - "-file_limit 123 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " + + "-file_limit 1048576 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " + "$TMPDIR/test%2Frepo.git", }, }, { @@ -642,7 +571,6 @@ func TestIndex(t *testing.T) { Incremental: true, IndexDir: "/data/index", Parallelism: 4, - FileLimit: 123, UseDelta: true, IndexOptions: IndexOptions{ RepoID: 0, @@ -670,7 +598,9 @@ func TestIndex(t *testing.T) { }, want: []string{ "git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git", - "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 1 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed 12345678 oldhead olddev oldrelease", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal", + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 1", + "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed 12345678 oldhead olddev oldrelease", "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", "git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/dev feebdaed", "git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/release 12345678", @@ -683,7 +613,7 @@ func TestIndex(t *testing.T) { "git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0", "git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1", "zoekt-git-index -submodules=false -incremental -branches HEAD,dev,release " + - "-delta -delta_threshold 22 -file_limit 123 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " + + "-delta -delta_threshold 22 -file_limit 1048576 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " + "$TMPDIR/test%2Frepo.git", }, }} @@ -692,6 +622,10 @@ func TestIndex(t *testing.T) { t.Run(tc.name, func(t *testing.T) { var got []string runCmd := func(c *exec.Cmd) error { + if c.Env != nil { + t.Fatal("expected nil Env for command. Tenant Enforcement relies on us inheritting the parent process environment.") + } + cmd := strings.Join(c.Args, " ") cmd = strings.ReplaceAll(cmd, filepath.Clean(os.TempDir()), "$TMPDIR") got = append(got, cmd) @@ -711,7 +645,7 @@ func TestIndex(t *testing.T) { findRepositoryMetadata: findRepositoryMetadata, } - if err := gitIndex(c, &tc.args, sourcegraphNop{}, logtest.Scoped(t)); err != nil { + if err := gitIndex(context.Background(), c, &tc.args, sourcegraphNop{}, logtest.Scoped(t)); err != nil { t.Fatal(err) } if !cmp.Equal(got, tc.want) { @@ -726,12 +660,12 @@ var splitargs = cmpopts.AcyclicTransformer("splitargs", func(cmd string) []strin }) type mockGRPCClient struct { - mockSearchConfiguration func(context.Context, *proto.SearchConfigurationRequest, ...grpc.CallOption) (*proto.SearchConfigurationResponse, error) - mockList func(context.Context, *proto.ListRequest, ...grpc.CallOption) (*proto.ListResponse, error) - mockUpdateIndexStatus func(context.Context, *proto.UpdateIndexStatusRequest, ...grpc.CallOption) (*proto.UpdateIndexStatusResponse, error) + mockSearchConfiguration func(context.Context, *configv1.SearchConfigurationRequest, ...grpc.CallOption) (*configv1.SearchConfigurationResponse, error) + mockList func(context.Context, *configv1.ListRequest, ...grpc.CallOption) (*configv1.ListResponse, error) + mockUpdateIndexStatus func(context.Context, *configv1.UpdateIndexStatusRequest, ...grpc.CallOption) (*configv1.UpdateIndexStatusResponse, error) } -func (m *mockGRPCClient) SearchConfiguration(ctx context.Context, in *proto.SearchConfigurationRequest, opts ...grpc.CallOption) (*proto.SearchConfigurationResponse, error) { +func (m *mockGRPCClient) SearchConfiguration(ctx context.Context, in *configv1.SearchConfigurationRequest, opts ...grpc.CallOption) (*configv1.SearchConfigurationResponse, error) { if m.mockSearchConfiguration != nil { return m.mockSearchConfiguration(ctx, in, opts...) } @@ -739,7 +673,7 @@ func (m *mockGRPCClient) SearchConfiguration(ctx context.Context, in *proto.Sear return nil, fmt.Errorf("mock RPC SearchConfiguration not implemented") } -func (m *mockGRPCClient) List(ctx context.Context, in *proto.ListRequest, opts ...grpc.CallOption) (*proto.ListResponse, error) { +func (m *mockGRPCClient) List(ctx context.Context, in *configv1.ListRequest, opts ...grpc.CallOption) (*configv1.ListResponse, error) { if m.mockList != nil { return m.mockList(ctx, in, opts...) } @@ -747,7 +681,7 @@ func (m *mockGRPCClient) List(ctx context.Context, in *proto.ListRequest, opts . return nil, fmt.Errorf("mock RPC List not implemented") } -func (m *mockGRPCClient) UpdateIndexStatus(ctx context.Context, in *proto.UpdateIndexStatusRequest, opts ...grpc.CallOption) (*proto.UpdateIndexStatusResponse, error) { +func (m *mockGRPCClient) UpdateIndexStatus(ctx context.Context, in *configv1.UpdateIndexStatusRequest, opts ...grpc.CallOption) (*configv1.UpdateIndexStatusResponse, error) { if m.mockUpdateIndexStatus != nil { return m.mockUpdateIndexStatus(ctx, in, opts...) } @@ -755,7 +689,7 @@ func (m *mockGRPCClient) UpdateIndexStatus(ctx context.Context, in *proto.Update return nil, fmt.Errorf("mock RPC UpdateIndexStatus not implemented") } -var _ proto.ZoektConfigurationServiceClient = &mockGRPCClient{} +var _ configv1.ZoektConfigurationServiceClient = &mockGRPCClient{} // Tests whether we can set git config values without error. func TestSetZoektConfig(t *testing.T) { diff --git a/cmd/zoekt-sourcegraph-indexserver/main.go b/cmd/zoekt-sourcegraph-indexserver/main.go index 5c7d05f20..f7793024d 100644 --- a/cmd/zoekt-sourcegraph-indexserver/main.go +++ b/cmd/zoekt-sourcegraph-indexserver/main.go @@ -1,5 +1,18 @@ -// Command zoekt-sourcegraph-indexserver periodically reindexes enabled -// repositories on sourcegraph +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Command zoekt-sourcegraph-indexserver periodically reindexes repositories +// from a Sourcegraph instance. It uses a "pull-based" design, where it periodically +// reaches out to the Sourcegraph instance for the list of repositories to reindex. package main import ( @@ -24,6 +37,7 @@ import ( "os/signal" "path/filepath" "runtime" + "slices" "sort" "strconv" "strings" @@ -38,22 +52,27 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" sglog "github.com/sourcegraph/log" "github.com/sourcegraph/mountinfo" + + "github.com/sourcegraph/zoekt" + configv1 "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1" + indexserverv1 "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/grpc/protos/zoekt/indexserver/v1" + "github.com/sourcegraph/zoekt/grpc/defaults" + "github.com/sourcegraph/zoekt/grpc/grpcutil" + "github.com/sourcegraph/zoekt/grpc/internalerrs" + "github.com/sourcegraph/zoekt/grpc/messagesize" + "github.com/sourcegraph/zoekt/index" + "github.com/sourcegraph/zoekt/internal/debugserver" + "github.com/sourcegraph/zoekt/internal/profiler" + "github.com/sourcegraph/zoekt/internal/tenant" + "go.uber.org/automaxprocs/maxprocs" + "go.uber.org/multierr" "golang.org/x/net/trace" "golang.org/x/sys/unix" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/metadata" - - "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" - proto "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1" - "github.com/sourcegraph/zoekt/debugserver" - "github.com/sourcegraph/zoekt/grpc/internalerrs" - "github.com/sourcegraph/zoekt/grpc/messagesize" - "github.com/sourcegraph/zoekt/internal/profiler" - "github.com/sourcegraph/zoekt/internal/tenant" ) var ( @@ -111,7 +130,7 @@ var ( metricIndexIncrementalIndexState = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "index_incremental_index_state", Help: "A count of the state on disk vs what we want to build. See zoekt/build.IndexState.", - }, []string{"state"}) // state is build.IndexState + }, []string{"state"}) // state is index.IndexState metricNumIndexed = promauto.NewGauge(prometheus.GaugeOpts{ Name: "index_num_indexed", @@ -150,8 +169,7 @@ var ( }) ) -// 1 MB; match https://sourcegraph.sgdev.org/github.com/sourcegraph/sourcegraph/-/blob/cmd/symbols/internal/symbols/search.go#L22 -// NOTE: if you change this, you must also update gitIndex to use the same value when fetching the repo. +// 1 MB; match https://sourcegraph.sourcegraph.com/r/github.com/sourcegraph/sourcegraph/-/blob/cmd/searcher/internal/search/store.go?L32 const MaxFileSize = 1 << 20 // set of repositories that we want to capture separate indexing metrics for @@ -173,7 +191,11 @@ type Server struct { logger sglog.Logger Sourcegraph Sourcegraph - BatchSize int + + // rootURL is the root URL of the Sourcegraph instance. + rootURL *url.URL + + BatchSize int // IndexDir is the index directory to use. IndexDir string @@ -181,6 +203,9 @@ type Server struct { // IndexConcurrency is the number of repositories we index at once. IndexConcurrency int + // indexSemaphore limits the number of concurrent index operations + indexSemaphore chan struct{} + // Interval is how often we sync with Sourcegraph. Interval time.Duration @@ -213,6 +238,8 @@ type Server struct { // timeout defines how long the index server waits before killing an indexing job. timeout time.Duration + + indexserverv1.UnimplementedSourcegraphIndexserverServiceServer } var ( @@ -324,7 +351,27 @@ func (sb *synchronizedBuffer) String() string { // pauseFileName if present in IndexDir will stop index jobs from // running. This is to make it possible to experiment with the content of the // IndexDir without the indexserver writing to it. -const pauseFileName = "PAUSE" +const ( + pauseFileName = "PAUSE" + pauseEnvVar = "SRC_PAUSE_INDEXING" +) + +// isIndexingPaused checks if indexing should be paused based on either: +// 1. The presence of a PAUSE file in the index directory +// 2. The SRC_PAUSE_INDEXING environment variable being set to true +func isIndexingPaused(indexDir string) (bool, string) { + // Check for PAUSE file first + if b, err := os.ReadFile(filepath.Join(indexDir, pauseFileName)); err == nil { + return true, fmt.Sprintf("indexserver manually paused via PAUSE file: %s", string(bytes.TrimSpace(b))) + } + + // Then check environment variable + if getEnvWithDefaultBool(pauseEnvVar, false) { + return true, fmt.Sprintf("indexserver paused via %s environment variable", pauseEnvVar) + } + + return false, "" +} // Run the sync loop. This blocks forever. func (s *Server) Run() { @@ -334,11 +381,10 @@ func (s *Server) Run() { go func() { // We update the list of indexed repos every Interval. To speed up manual // testing we also listen for SIGUSR1 to trigger updates. - // // "pkill -SIGUSR1 zoekt-sourcegra" for range jitterTicker(s.Interval, unix.SIGUSR1) { - if b, err := os.ReadFile(filepath.Join(s.IndexDir, pauseFileName)); err == nil { - infoLog.Printf("indexserver manually paused via PAUSE file: %s", string(bytes.TrimSpace(b))) + if paused, msg := isIndexingPaused(s.IndexDir); paused { + infoLog.Printf("%s", msg) continue } @@ -376,7 +422,7 @@ func (s *Server) Run() { missing := s.queue.Bump(repos.IDs) s.Sourcegraph.ForceIterateIndexOptions(s.queue.AddOrUpdate, func(uint32, error) {}, missing...) - setCompoundShardCounter(s.IndexDir) + setShardsCounter(s.IndexDir) <-cleanupDone } @@ -398,7 +444,7 @@ func (s *Server) Run() { } }() - for i := 0; i < s.IndexConcurrency; i++ { + for range s.IndexConcurrency { go s.processQueue() } @@ -413,7 +459,7 @@ func formatListUint32(v []uint32, m int) string { } sb := strings.Builder{} - for i := 0; i < m; i++ { + for i := range m { fmt.Fprintf(&sb, "%d, ", v[i]) } @@ -426,7 +472,7 @@ func formatListUint32(v []uint32, m int) string { func (s *Server) processQueue() { for { - if _, err := os.Stat(filepath.Join(s.IndexDir, pauseFileName)); err == nil { + if paused, _ := isIndexingPaused(s.IndexDir); paused { time.Sleep(time.Second) continue } @@ -445,7 +491,7 @@ func (s *Server) processQueue() { // recording time taken while merging/cleanup runs. start := time.Now() - state, err := s.Index(args) + state, err := s.index(context.Background(), args) elapsed := time.Since(start) metricIndexDuration.WithLabelValues(string(state), repoNameForMetric(opts.Name)).Observe(elapsed.Seconds()) @@ -547,7 +593,7 @@ func jitterTicker(d time.Duration, sig ...os.Signal) <-chan struct{} { } // Index starts an index job for repo name at commit. -func (s *Server) Index(args *indexArgs) (state indexState, err error) { +func (s *Server) index(ctx context.Context, args *indexArgs) (state indexState, err error) { tr := trace.New("index", args.Name) tr.SetMaxEvents(30) // Ensure we capture all indexing events @@ -596,11 +642,11 @@ func (s *Server) Index(args *indexArgs) (state indexState, err error) { metricIndexIncrementalIndexState.WithLabelValues(string(incrementalState)).Inc() switch incrementalState { - case build.IndexStateEqual: + case index.IndexStateEqual: debugLog.Printf("%s index already up to date. Shard=%s", args.String(), fn) return indexStateNoop, nil - case build.IndexStateMeta: + case index.IndexStateMeta: infoLog.Printf("updating index.meta %s", args.String()) // TODO(stefan) handle mergeMeta for tenant id. @@ -610,7 +656,7 @@ func (s *Server) Index(args *indexArgs) (state indexState, err error) { return indexStateSuccessMeta, nil } - case build.IndexStateCorrupt: + case index.IndexStateCorrupt: infoLog.Printf("falling back to full update: corrupt index: %s", args.String()) } } @@ -629,7 +675,7 @@ func (s *Server) Index(args *indexArgs) (state indexState, err error) { timeout: s.timeout, } - err = gitIndex(c, args, s.Sourcegraph, s.logger) + err = gitIndex(ctx, c, args, s.Sourcegraph, s.logger) if err != nil { return indexStateFail, err } @@ -685,7 +731,6 @@ func (s *Server) indexArgs(opts IndexOptions) *indexArgs { IndexDir: s.IndexDir, Parallelism: parallelism, Incremental: true, - FileLimit: MaxFileSize, ShardMerging: s.shardMerging, } } @@ -724,7 +769,7 @@ func createEmptyShard(args *indexArgs) error { return nil } - builder, err := build.NewBuilder(*bo) + builder, err := index.NewBuilder(*bo) if err != nil { return err } @@ -807,12 +852,12 @@ func (s *Server) handleRoot(w http.ResponseWriter, r *http.Request) { // ?id= indexMsg := "" if v := values.Get("id"); v != "" { - id, err := strconv.Atoi(v) + id, err := strconv.ParseUint(v, 10, 32) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } - indexMsg, _ = s.forceIndex(uint32(id)) + indexMsg, _ = s.forceIndex(r.Context(), uint32(id)) } // ?show_repos= @@ -861,13 +906,13 @@ func (s *Server) handleReindex(w http.ResponseWriter, r *http.Request) { return } - id, err := strconv.Atoi(r.Form.Get("repo")) + repoID, err := strconv.ParseUint(r.Form.Get("repo"), 10, 32) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } - go func() { s.forceIndex(uint32(id)) }() + go func() { s.forceIndex(r.Context(), uint32(repoID)) }() // 202 Accepted w.WriteHeader(http.StatusAccepted) @@ -999,7 +1044,7 @@ func (s *Server) handleDebugIndexed(w http.ResponseWriter, r *http.Request) { // forceIndex will run the index job for repo name now. It will return always // return a string explaining what it did, even if it failed. -func (s *Server) forceIndex(id uint32) (string, error) { +func (s *Server) forceIndex(ctx context.Context, id uint32) (string, error) { var opts IndexOptions var err error s.Sourcegraph.ForceIterateIndexOptions(func(o IndexOptions) { @@ -1016,7 +1061,7 @@ func (s *Server) forceIndex(id uint32) (string, error) { var state indexState ran := s.muIndexDir.With(opts.Name, func() { - state, err = s.Index(args) + state, err = s.index(ctx, args) }) if !ran { return fmt.Sprintf("index job for repository already running: %s", args), nil @@ -1027,6 +1072,57 @@ func (s *Server) forceIndex(id uint32) (string, error) { return fmt.Sprintf("Indexed %s with state %s", args.String(), state), nil } +// DeleteAllData deletes all shards in the index and trash dir belonging to the +// tenant associated with the request. The deletion is best-effort, which means +// we will delete as much as possible. If no error is returned, the caller can +// be certain that all data has been deleted. +func (s *Server) DeleteAllData(ctx context.Context, _ *indexserverv1.DeleteAllDataRequest) (*indexserverv1.DeleteAllDataResponse, error) { + tnt, err := tenant.FromContext(ctx) + if err != nil { + return nil, err + } + s.logger.Warn("DeleteAllData", sglog.Int("tenant_id", tnt.ID())) + + var merr error + s.muIndexDir.Global(func() { + // First, explode all compound shards that have repos from the tenant in + // question. Because we hold the global lock, we can be sure that no new + // merges start while we do this. + if err := s.explodeTenantCompoundShards(ctx, func(path string) error { + // We call explode in a separate process to protect indexserver. + cmd := defaultExplodeCmd(path) + + stdoutBuf := &bytes.Buffer{} + stderrBuf := &bytes.Buffer{} + cmd.Stdout = stdoutBuf + cmd.Stderr = stderrBuf + + err := cmd.Run() + if err != nil { + errorLog.Printf("explode failed: %v (stderr: %s)", err, stderrBuf.String()) + return err + } + + infoLog.Printf("exploded shard: %s", stdoutBuf.String()) + + return nil + }); err != nil { + merr = multierr.Append(merr, err) + } + + // Invariant: all shards from the tenant are simple shards. + + if err := purgeTenantShards(ctx, s.IndexDir); err != nil { + merr = multierr.Append(merr, err) + } + if err := purgeTenantShards(ctx, filepath.Join(s.IndexDir, ".trash")); err != nil { + merr = multierr.Append(merr, err) + } + }) + + return &indexserverv1.DeleteAllDataResponse{}, merr +} + func listIndexed(indexDir string) []uint32 { index := getShards(indexDir) metricNumIndexed.Set(float64(len(index))) @@ -1034,9 +1130,7 @@ func listIndexed(indexDir string) []uint32 { for id := range index { repoIDs = append(repoIDs, id) } - sort.Slice(repoIDs, func(i, j int) bool { - return repoIDs[i] < repoIDs[j] - }) + slices.Sort(repoIDs) return repoIDs } @@ -1071,7 +1165,7 @@ func setupTmpDir(logger sglog.Logger, main bool, index string) error { } func printMetaData(fn string) error { - repo, indexMeta, err := zoekt.ReadMetadataPath(fn) + repo, indexMeta, err := index.ReadMetadataPath(fn) if err != nil { return err } @@ -1094,12 +1188,12 @@ func printShardStats(fn string) error { return err } - iFile, err := zoekt.NewIndexFile(f) + iFile, err := index.NewIndexFile(f) if err != nil { return err } - return zoekt.PrintNgramStats(iFile) + return index.PrintNgramStats(iFile) } func srcLogLevelIsDebug() bool { @@ -1208,13 +1302,21 @@ func joinStringSet(set map[string]struct{}, sep string) string { return strings.Join(xs, sep) } -func setCompoundShardCounter(indexDir string) { - fns, err := filepath.Glob(filepath.Join(indexDir, "compound-*.zoekt")) +func setShardsCounter(indexDir string) { + fns, err := filepath.Glob(filepath.Join(indexDir, "*.zoekt")) if err != nil { - errorLog.Printf("setCompoundShardCounter: %s\n", err) + errorLog.Printf("setShardsCounter: %s\n", err) return } - metricNumberCompoundShards.Set(float64(len(fns))) + metricNumberShards.Set(float64(len(fns))) + + compoundFns := make([]string, 0, len(fns)) + for _, fn := range fns { + if strings.HasPrefix(filepath.Base(fn), "compound-") { + compoundFns = append(compoundFns, fn) + } + } + metricNumberCompoundShards.Set(float64(len(compoundFns))) } func rootCmd() *ffcli.Command { @@ -1265,9 +1367,9 @@ func (rc *rootConfig) registerRootFlags(fs *flag.FlagSet) { fs.StringVar(&rc.root, "sourcegraph_url", os.Getenv("SRC_FRONTEND_INTERNAL"), "http://sourcegraph-frontend-internal or http://localhost:3090. If a path to a directory, we fake the Sourcegraph API and index all repos rooted under path.") fs.DurationVar(&rc.interval, "interval", time.Minute, "sync with sourcegraph this often") fs.Int64Var(&rc.indexConcurrency, "index_concurrency", getEnvWithDefaultInt64("SRC_INDEX_CONCURRENCY", 1), "the number of repos to index concurrently") - fs.StringVar(&rc.index, "index", getEnvWithDefaultString("DATA_DIR", build.DefaultDir), "set index directory to use") + fs.StringVar(&rc.index, "index", getEnvWithDefaultString("DATA_DIR", index.DefaultDir), "set index directory to use") fs.StringVar(&rc.listen, "listen", ":6072", "listen on this address.") - fs.StringVar(&rc.hostname, "hostname", zoekt.HostnameBestEffort(), "the name we advertise to Sourcegraph when asking for the list of repositories to index. Can also be set via the NODE_NAME environment variable.") + fs.StringVar(&rc.hostname, "hostname", index.HostnameBestEffort(), "the name we advertise to Sourcegraph when asking for the list of repositories to index. Can also be set via the NODE_NAME environment variable.") fs.Float64Var(&rc.cpuFraction, "cpu_fraction", 1.0, "use this fraction of the cores for indexing.") fs.DurationVar(&rc.backoffDuration, "backoff_duration", getEnvWithDefaultDuration("BACKOFF_DURATION", 10*time.Minute), "for the given duration we backoff from enqueue operations for a repository that's failed its previous indexing attempt. Consecutive failures increase the duration of the delay linearly up to the maxBackoffDuration. A negative value disables indexing backoff.") fs.DurationVar(&rc.maxBackoffDuration, "max_backoff_duration", getEnvWithDefaultDuration("MAX_BACKOFF_DURATION", 120*time.Minute), "the maximum duration to backoff from enqueueing a repo for indexing. A negative value disables indexing backoff.") @@ -1288,7 +1390,7 @@ func startServer(conf rootConfig) error { } profiler.Init("zoekt-sourcegraph-indexserver") - setCompoundShardCounter(s.IndexDir) + setShardsCounter(s.IndexDir) if conf.listen != "" { @@ -1303,6 +1405,7 @@ func startServer(conf rootConfig) error { go func() { debugLog.Printf("serving HTTP on %s", conf.listen) + mux := grpcutil.MultiplexGRPC(newGRPCServer(sglog.Scoped("indexserver"), s), mux) log.Fatal(http.ListenAndServe(conf.listen, mux)) }() @@ -1455,10 +1558,7 @@ func newServer(conf rootConfig) (*Server, error) { } } - cpuCount := int(math.Round(float64(runtime.GOMAXPROCS(0)) * (conf.cpuFraction))) - if cpuCount < 1 { - cpuCount = 1 - } + cpuCount := max(int(math.Round(float64(runtime.GOMAXPROCS(0))*(conf.cpuFraction))), 1) if conf.indexConcurrency < 1 { conf.indexConcurrency = 1 @@ -1470,6 +1570,7 @@ func newServer(conf rootConfig) (*Server, error) { return &Server{ logger: logger, + rootURL: rootURL, Sourcegraph: sg, IndexDir: conf.index, IndexConcurrency: int(conf.indexConcurrency), @@ -1488,10 +1589,17 @@ func newServer(conf rootConfig) (*Server, error) { minSizeBytes: conf.minSize * 1024 * 1024, minAgeDays: conf.minAgeDays, }, - timeout: indexingTimeout, + timeout: indexingTimeout, + indexSemaphore: make(chan struct{}, int(conf.indexConcurrency)), }, err } +func newGRPCServer(logger sglog.Logger, s *Server, additionalOpts ...grpc.ServerOption) *grpc.Server { + grpcServer := defaults.NewServer(logger, additionalOpts...) + indexserverv1.RegisterSourcegraphIndexserverServiceServer(grpcServer, s) + return grpcServer +} + // defaultGRPCServiceConfigurationJSON is the default gRPC service configuration // for the indexed-search-configuration gRPC service. // @@ -1506,7 +1614,7 @@ func newServer(conf rootConfig) (*Server, error) { var defaultGRPCServiceConfigurationJSON string func internalActorUnaryInterceptor() grpc.UnaryClientInterceptor { - return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { + return func(ctx context.Context, method string, req, reply any, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { ctx = metadata.AppendToOutgoingContext(ctx, "X-Sourcegraph-Actor-UID", "internal") return invoker(ctx, method, req, reply, cc, opts...) } @@ -1523,7 +1631,7 @@ func internalActorStreamInterceptor() grpc.StreamClientInterceptor { // This can be overridden by providing custom Server/Dial options. const defaultGRPCMessageReceiveSizeBytes = 90 * 1024 * 1024 // 90 MB -func dialGRPCClient(addr string, logger sglog.Logger, additionalOpts ...grpc.DialOption) (proto.ZoektConfigurationServiceClient, error) { +func dialGRPCClient(addr string, logger sglog.Logger, additionalOpts ...grpc.DialOption) (configv1.ZoektConfigurationServiceClient, error) { metrics := clientMetricsOnce() // If the service seems to be unavailable, this @@ -1574,7 +1682,7 @@ func dialGRPCClient(addr string, logger sglog.Logger, additionalOpts ...grpc.Dia return nil, fmt.Errorf("dialing %q: %w", addr, err) } - client := proto.NewZoektConfigurationServiceClient(cc) + client := configv1.NewZoektConfigurationServiceClient(cc) return client, nil } @@ -1626,8 +1734,8 @@ func cloneURL(u *url.URL) *url.URL { func main() { liblog := sglog.Init(sglog.Resource{ Name: "zoekt-indexserver", - Version: zoekt.Version, - InstanceID: zoekt.HostnameBestEffort(), + Version: index.Version, + InstanceID: index.HostnameBestEffort(), }) defer liblog.Sync() diff --git a/cmd/zoekt-sourcegraph-indexserver/main_test.go b/cmd/zoekt-sourcegraph-indexserver/main_test.go index e86bf953f..2fab54c68 100644 --- a/cmd/zoekt-sourcegraph-indexserver/main_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/main_test.go @@ -9,20 +9,19 @@ import ( "net/url" "os" "path/filepath" - "sort" + "slices" "strings" "testing" + "github.com/google/go-cmp/cmp" sglog "github.com/sourcegraph/log" "github.com/sourcegraph/log/logtest" "github.com/stretchr/testify/require" "github.com/xeipuuv/gojsonschema" "google.golang.org/grpc" - "github.com/google/go-cmp/cmp" - "github.com/sourcegraph/zoekt" - proto "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1" + configv1 "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1" "github.com/sourcegraph/zoekt/internal/tenant" ) @@ -45,7 +44,6 @@ func TestServer_defaultArgs(t *testing.T) { IndexDir: "/testdata/index", Parallelism: 6, Incremental: true, - FileLimit: 1 << 20, } got := s.indexArgs(IndexOptions{Name: "testName"}) if !cmp.Equal(got, want) { @@ -55,7 +53,7 @@ func TestServer_defaultArgs(t *testing.T) { func TestIndexNoTenant(t *testing.T) { s := &Server{} - _, err := s.Index(&indexArgs{}) + _, err := s.index(context.Background(), &indexArgs{}) require.ErrorIs(t, err, tenant.ErrMissingTenant) } @@ -150,18 +148,14 @@ func TestListRepoIDs(t *testing.T) { s := newSourcegraphClient(&testURL, testHostname, grpcClient, clientOptions...) listCalled := false - grpcClient.mockList = func(ctx context.Context, in *proto.ListRequest, opts ...grpc.CallOption) (*proto.ListResponse, error) { + grpcClient.mockList = func(ctx context.Context, in *configv1.ListRequest, opts ...grpc.CallOption) (*configv1.ListResponse, error) { listCalled = true gotRepoIDs := in.GetIndexedIds() - sort.Slice(gotRepoIDs, func(i, j int) bool { - return gotRepoIDs[i] < gotRepoIDs[j] - }) + slices.Sort(gotRepoIDs) wantRepoIDs := []int32{1, 3} - sort.Slice(wantRepoIDs, func(i, j int) bool { - return wantRepoIDs[i] < wantRepoIDs[j] - }) + slices.Sort(wantRepoIDs) if diff := cmp.Diff(wantRepoIDs, gotRepoIDs); diff != "" { t.Errorf("indexed repoIDs mismatch (-want +got):\n%s", diff) @@ -172,7 +166,7 @@ func TestListRepoIDs(t *testing.T) { t.Errorf("hostname mismatch (-want +got):\n%s", diff) } - return &proto.ListResponse{RepoIds: []int32{1, 2, 3}}, nil + return &configv1.ListResponse{RepoIds: []int32{1, 2, 3}}, nil } ctx := context.Background() @@ -186,14 +180,10 @@ func TestListRepoIDs(t *testing.T) { } receivedRepoIDs := got.IDs - sort.Slice(receivedRepoIDs, func(i, j int) bool { - return receivedRepoIDs[i] < receivedRepoIDs[j] - }) + slices.Sort(receivedRepoIDs) expectedRepoIDs := []uint32{1, 2, 3} - sort.Slice(expectedRepoIDs, func(i, j int) bool { - return expectedRepoIDs[i] < expectedRepoIDs[j] - }) + slices.Sort(expectedRepoIDs) if diff := cmp.Diff(expectedRepoIDs, receivedRepoIDs); diff != "" { t.Errorf("mismatch in list of all repoIDs (-want +got):\n%s", diff) @@ -227,7 +217,6 @@ func TestCreateEmptyShard(t *testing.T) { Incremental: true, IndexDir: dir, Parallelism: 1, - FileLimit: 1, } if err := createEmptyShard(args); err != nil { diff --git a/cmd/zoekt-sourcegraph-indexserver/merge.go b/cmd/zoekt-sourcegraph-indexserver/merge.go index ca70ddd21..69989193d 100644 --- a/cmd/zoekt-sourcegraph-indexserver/merge.go +++ b/cmd/zoekt-sourcegraph-indexserver/merge.go @@ -2,6 +2,7 @@ package main import ( "bytes" + "context" "os" "os/exec" "path/filepath" @@ -13,7 +14,8 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "go.uber.org/atomic" - "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" + "github.com/sourcegraph/zoekt/internal/tenant" ) var metricShardMergingRunning = promauto.NewGauge(prometheus.GaugeOpts{ @@ -46,6 +48,12 @@ func defaultMergeCmd(args ...string) *exec.Cmd { return cmd } +func defaultExplodeCmd(args ...string) *exec.Cmd { + cmd := exec.Command("zoekt-merge-index", "explode") + cmd.Args = append(cmd.Args, args...) + return cmd +} + // doMerge drives the merge process. It holds the lock on s.indexDir for the // duration of 1 merge, which might be several minutes, depending on the target // size of the compound shard. @@ -203,7 +211,7 @@ func isExcluded(path string, fi os.FileInfo, opts mergeOpts) bool { return true } - repos, _, err := zoekt.ReadMetadataPath(path) + repos, _, err := index.ReadMetadataPath(path) if err != nil { debugLog.Printf("failed to load metadata for %s\n", fi.Name()) return true @@ -234,3 +242,41 @@ func (c *compound) add(cand candidate) { c.shards = append(c.shards, cand) c.size += cand.sizeBytes } + +// explodeTenantCompoundShards explodes all compound shards that have repos from +// the tenant in question. The caller must hold the global lock. +func (s *Server) explodeTenantCompoundShards(ctx context.Context, explodeFunc func(path string) error) error { + tnt, err := tenant.FromContext(ctx) + if err != nil { + return err + } + + paths, err := filepath.Glob(filepath.Join(s.IndexDir, "compound-*")) + if err != nil { + return err + } + if len(paths) == 0 { + return nil + } + +nextCompoundShard: + for _, path := range paths { + // We don't use ReadMetadataPathAlive because we want to detect + // tombstoned repos, too. + repos, _, err := index.ReadMetadataPath(path) + if err != nil { + return err + } + for _, repo := range repos { + if repo.TenantID == tnt.ID() { + err := explodeFunc(path) + if err != nil { + return err + } + + continue nextCompoundShard + } + } + } + return nil +} diff --git a/cmd/zoekt-sourcegraph-indexserver/merge_test.go b/cmd/zoekt-sourcegraph-indexserver/merge_test.go index a7491d96b..c08e14b0c 100644 --- a/cmd/zoekt-sourcegraph-indexserver/merge_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/merge_test.go @@ -10,8 +10,11 @@ import ( "strings" "testing" + "github.com/stretchr/testify/require" + "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" + "github.com/sourcegraph/zoekt/internal/tenant/tenanttest" ) func TestHasMultipleShards(t *testing.T) { @@ -48,12 +51,12 @@ func TestDoNotDeleteSingleShards(t *testing.T) { dir := t.TempDir() // Create a test shard. - opts := build.Options{ + opts := index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{Name: "test-repo"}, } opts.SetDefaults() - b, err := build.NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -198,6 +201,65 @@ func TestMerge(t *testing.T) { } } +func TestExplodeTenantCompoundShards(t *testing.T) { + tenanttest.MockEnforce(t) + dir := t.TempDir() + s := &Server{IndexDir: dir} + + // Create two compound shards: + // 1. One with repos from tenant 1 and 2 + // 2. One with repos from tenant 2 and 3 + cs1 := createCompoundShard(t, dir, []uint32{1, 2}, func(in *zoekt.Repository) { + if in.ID == 1 { + in.TenantID = 1 + } else { + in.TenantID = 2 + } + }) + + cs2 := createCompoundShard(t, dir, []uint32{3, 4}, func(in *zoekt.Repository) { + if in.ID == 3 { + in.TenantID = 2 + } else { + in.TenantID = 3 + } + }) + + // Create context with tenant 1 + ctx := tenanttest.NewTestContext() + + // Explode shards for tenant 1 + err := s.explodeTenantCompoundShards(ctx, func(path string) error { + // For this test we call explode directly instead of calling it in a + // separate process. + return index.Explode(dir, path) + }) + require.NoError(t, err) + + // Check that only cs1 was exploded (since it contained a repo from tenant + // 1) and cs2 remains untouched + require.NoFileExists(t, cs1) + require.FileExists(t, cs2) + + // Check that we have 2 simple shards (from cs1) and 1 compound shard (cs2) + simpleShards, err := filepath.Glob(filepath.Join(dir, "*_v16.00000.zoekt")) + require.NoError(t, err) + require.Len(t, simpleShards, 2, "expected 2 simple shards") + + // check that the simple shards are from tenant 1 and 2 + for _, shard := range simpleShards { + repos, _, err := index.ReadMetadataPath(shard) + require.NoError(t, err) + for _, repo := range repos { + require.Contains(t, []int{1, 2}, repo.TenantID, "expected tenant 1 or 2, but got %d", repo.TenantID) + } + } + + compoundShards, err := filepath.Glob(filepath.Join(dir, "compound-*")) + require.NoError(t, err) + require.Len(t, compoundShards, 1, "expected 1 compound shard") +} + func copyTestShards(dstDir string, srcShards []string) ([]string, error) { var tmpShards []string for _, s := range srcShards { diff --git a/cmd/zoekt-sourcegraph-indexserver/meta.go b/cmd/zoekt-sourcegraph-indexserver/meta.go index 52ede7ee4..ff4513101 100644 --- a/cmd/zoekt-sourcegraph-indexserver/meta.go +++ b/cmd/zoekt-sourcegraph-indexserver/meta.go @@ -6,8 +6,10 @@ import ( "os" "path/filepath" + "golang.org/x/sys/unix" + "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" ) // mergeMeta updates the .meta files for the shards on disk for o. @@ -16,10 +18,10 @@ import ( // failure. This means you might have an inconsistent state on disk if an // error is returned. It is recommended to fallback to re-indexing in that // case. -func mergeMeta(o *build.Options) error { +func mergeMeta(o *index.Options) error { todo := map[string]string{} for _, fn := range o.FindAllShards() { - repos, md, err := zoekt.ReadMetadataPath(fn) + repos, md, err := index.ReadMetadataPath(fn) if err != nil { return err } @@ -44,7 +46,7 @@ func mergeMeta(o *build.Options) error { continue } - var merged interface{} + var merged any if md.IndexFormatVersion >= 17 { merged = repos } else { @@ -81,7 +83,7 @@ func mergeMeta(o *build.Options) error { // // Note: .tmp is the same suffix used by Builder. indexserver knows to clean // them up. -func jsonMarshalTmpFile(v interface{}, p string) (_ string, err error) { +func jsonMarshalTmpFile(v any, p string) (_ string, err error) { b, err := json.Marshal(v) if err != nil { return "", err @@ -110,3 +112,8 @@ func jsonMarshalTmpFile(v interface{}, p string) (_ string, err error) { // respect process umask. build does this. var umask os.FileMode + +func init() { + umask = os.FileMode(unix.Umask(0)) + unix.Umask(int(umask)) +} diff --git a/cmd/zoekt-sourcegraph-indexserver/meta_test.go b/cmd/zoekt-sourcegraph-indexserver/meta_test.go index e36971980..c6106cb92 100644 --- a/cmd/zoekt-sourcegraph-indexserver/meta_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/meta_test.go @@ -6,8 +6,9 @@ import ( "testing" "github.com/google/go-cmp/cmp" + "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" ) func TestMergeMeta(t *testing.T) { @@ -17,7 +18,7 @@ func TestMergeMeta(t *testing.T) { var repoFns []string for _, name := range repoNames { - opts := build.Options{ + opts := index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ Name: name, @@ -27,7 +28,7 @@ func TestMergeMeta(t *testing.T) { }, } opts.SetDefaults() - b, err := build.NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -41,7 +42,7 @@ func TestMergeMeta(t *testing.T) { } // update meta on repo3 then test it changed - opts := &build.Options{ + opts := &index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ Name: "repo3", @@ -54,7 +55,7 @@ func TestMergeMeta(t *testing.T) { if err := mergeMeta(opts); err != nil { t.Fatal(err) } - repos, _, _ := zoekt.ReadMetadataPath(repoFns[3]) + repos, _, _ := index.ReadMetadataPath(repoFns[3]) if got, want := repos[0].RawConfig["public"], "0"; got != want { t.Fatalf("failed to update metadata of repo3. Got public %q want %q", got, want) } @@ -72,7 +73,7 @@ func TestMergeMeta(t *testing.T) { readPublic := func() []string { var public []string - repos, _, _ := zoekt.ReadMetadataPath(dstFn) + repos, _, _ := index.ReadMetadataPath(dstFn) for _, r := range repos { public = append(public, r.RawConfig["public"]) } @@ -84,7 +85,7 @@ func TestMergeMeta(t *testing.T) { } // Update a repo1 in compound shard to be private - opts = &build.Options{ + opts = &index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ Name: "repo1", @@ -105,7 +106,7 @@ func TestMergeMeta(t *testing.T) { func merge(t *testing.T, dstDir string, names []string) (string, string, error) { t.Helper() - var files []zoekt.IndexFile + var files []index.IndexFile for _, fn := range names { f, err := os.Open(fn) if err != nil { @@ -113,7 +114,7 @@ func merge(t *testing.T, dstDir string, names []string) (string, string, error) } defer f.Close() - indexFile, err := zoekt.NewIndexFile(f) + indexFile, err := index.NewIndexFile(f) if err != nil { return "", "", err } @@ -122,5 +123,5 @@ func merge(t *testing.T, dstDir string, names []string) (string, string, error) files = append(files, indexFile) } - return zoekt.Merge(dstDir, files...) + return index.Merge(dstDir, files...) } diff --git a/cmd/zoekt-sourcegraph-indexserver/meta_unix.go b/cmd/zoekt-sourcegraph-indexserver/meta_unix.go deleted file mode 100644 index 29fe8e507..000000000 --- a/cmd/zoekt-sourcegraph-indexserver/meta_unix.go +++ /dev/null @@ -1,14 +0,0 @@ -//go:build !windows - -package main - -import ( - "os" - - "golang.org/x/sys/unix" -) - -func init() { - umask = os.FileMode(unix.Umask(0)) - unix.Umask(int(umask)) -} diff --git a/cmd/zoekt-sourcegraph-indexserver/meta_windows.go b/cmd/zoekt-sourcegraph-indexserver/meta_windows.go deleted file mode 100644 index 3411af911..000000000 --- a/cmd/zoekt-sourcegraph-indexserver/meta_windows.go +++ /dev/null @@ -1,6 +0,0 @@ -package main - -func init() { - // no setting of file permissions on Windows - umask = 0 -} diff --git a/cmd/zoekt-sourcegraph-indexserver/owner.go b/cmd/zoekt-sourcegraph-indexserver/owner.go index def1f48a0..0020c0ab8 100644 --- a/cmd/zoekt-sourcegraph-indexserver/owner.go +++ b/cmd/zoekt-sourcegraph-indexserver/owner.go @@ -58,13 +58,13 @@ func (o *ownerChecker) Init() error { return err } - content := []byte(fmt.Sprintf(`DO NOT EDIT! generated by zoekt-sourcegraph-indexserver. + content := fmt.Appendf(nil, `DO NOT EDIT! generated by zoekt-sourcegraph-indexserver. This file records the identity of the owner of this zoekt index directory. If it changes, zoekt-sourcegraph-indexserver will exit with a non-zero exit code. This is to prevent multiple owners/writers. hostname=%s -`, o.Hostname)) +`, o.Hostname) // Always write out since we may update the comment if err := os.WriteFile(o.Path, content, 0o600); err != nil { diff --git a/cmd/zoekt-sourcegraph-indexserver/purge.go b/cmd/zoekt-sourcegraph-indexserver/purge.go new file mode 100644 index 000000000..7d5989488 --- /dev/null +++ b/cmd/zoekt-sourcegraph-indexserver/purge.go @@ -0,0 +1,73 @@ +package main + +import ( + "context" + "os" + "path/filepath" + "strings" + + "go.uber.org/multierr" + + "github.com/sourcegraph/zoekt/index" + "github.com/sourcegraph/zoekt/internal/tenant" +) + +// purgeTenantShards removes all simple shards from dir on a best-effort basis. +// It returns an error if there is no tenant in the context or if it encounters +// an error while removing a shard. +func purgeTenantShards(ctx context.Context, dir string) error { + tnt, err := tenant.FromContext(ctx) + if err != nil { + return err + } + + d, err := os.Open(dir) + if err != nil { + return err + } + defer d.Close() + + names, err := d.Readdirnames(-1) + if err != nil { + return err + } + + var merr error + for _, n := range names { + path := filepath.Join(dir, n) + fi, err := os.Stat(path) + if err != nil { + merr = multierr.Append(merr, err) + continue + } + if fi.IsDir() || filepath.Ext(path) != ".zoekt" { + continue + } + + // Skip compound shards. + if strings.HasPrefix(filepath.Base(path), "compound-") { + continue + } + + repos, _, err := index.ReadMetadataPath(path) + if err != nil { + merr = multierr.Append(merr, err) + continue + } + // Since we excluded compound shards, we know there is exactly one repo + if repos[0].TenantID == tnt.ID() { + paths, err := index.IndexFilePaths(path) + if err != nil { + merr = multierr.Append(merr, err) + continue + } + for _, p := range paths { + if err := os.Remove(p); err != nil { + merr = multierr.Append(merr, err) + } + } + } + } + + return merr +} diff --git a/cmd/zoekt-sourcegraph-indexserver/purge_test.go b/cmd/zoekt-sourcegraph-indexserver/purge_test.go new file mode 100644 index 000000000..2450a8e13 --- /dev/null +++ b/cmd/zoekt-sourcegraph-indexserver/purge_test.go @@ -0,0 +1,102 @@ +package main + +import ( + "context" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/internal/tenant/tenanttest" +) + +func TestPurgeTenantShards(t *testing.T) { + // TestPurgeTenantShards verifies both the basic shard purging functionality + // and proper isolation between tenants. It ensures that: + // 1. Shards are only purged when a valid tenant context is provided + // 2. Only shards belonging to the specified tenant are purged + // 3. Compound shards are preserved regardless of tenant + // 4. Other tenants' shards remain untouched + dir := t.TempDir() + + // Create test shards for different tenants + tenant1Ctx := tenanttest.NewTestContext() + tenant2Ctx := tenanttest.NewTestContext() + + // Helper to set tenant ID for test shards + setTenantID := func(id int) func(in *zoekt.Repository) { + return func(in *zoekt.Repository) { + in.TenantID = id + } + } + + // Create test shards for tenant 1 + tenant1Shard1 := filepath.Join(dir, "tenant1_repo1.zoekt") + tenant1Shard2 := filepath.Join(dir, "tenant1_repo2.zoekt") + createTestShard(t, "tenant1_repo1", 1, tenant1Shard1, setTenantID(1)) + createTestShard(t, "tenant1_repo2", 2, tenant1Shard2, setTenantID(1)) + + // Create test shards for tenant 2 + tenant2Shard := filepath.Join(dir, "tenant2_repo1.zoekt") + createTestShard(t, "tenant2_repo1", 3, tenant2Shard, setTenantID(2)) + + // Create a compound shard (should be skipped) + compoundShard := filepath.Join(dir, "compound-1234.zoekt") + createTestShard(t, "compound_repo", 4, compoundShard, setTenantID(1)) + + // Test cases + tests := []struct { + name string + ctx context.Context + wantErr bool + check func(t *testing.T, dir string) + }{ + { + name: "no tenant in context", + ctx: context.Background(), + wantErr: true, + check: func(t *testing.T, dir string) { + // All files should still exist + require.FileExists(t, tenant1Shard1) + require.FileExists(t, tenant1Shard2) + require.FileExists(t, tenant2Shard) + require.FileExists(t, compoundShard) + }, + }, + { + name: "purge tenant 1 shards", + ctx: tenant1Ctx, + check: func(t *testing.T, dir string) { + // Tenant 1 shards should be deleted + require.NoFileExists(t, tenant1Shard1) + require.NoFileExists(t, tenant1Shard2) + // Other shards should still exist + require.FileExists(t, tenant2Shard) + require.FileExists(t, compoundShard) + }, + }, + { + name: "purge tenant 2 shards", + ctx: tenant2Ctx, + check: func(t *testing.T, dir string) { + // Tenant 2 shard should be deleted + require.NoFileExists(t, tenant2Shard) + // Compound shard should still exist + require.FileExists(t, compoundShard) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := purgeTenantShards(tt.ctx, dir) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + tt.check(t, dir) + }) + } +} diff --git a/cmd/zoekt-sourcegraph-indexserver/queue.go b/cmd/zoekt-sourcegraph-indexserver/queue.go index 50dd08805..a1ae2bbe7 100644 --- a/cmd/zoekt-sourcegraph-indexserver/queue.go +++ b/cmd/zoekt-sourcegraph-indexserver/queue.go @@ -389,14 +389,14 @@ func (pq pqueue) Swap(i, j int) { pq[j].heapIdx = j } -func (pq *pqueue) Push(x interface{}) { +func (pq *pqueue) Push(x any) { n := len(*pq) item := x.(*queueItem) item.heapIdx = n *pq = append(*pq, item) } -func (pq *pqueue) Pop() interface{} { +func (pq *pqueue) Pop() any { old := *pq n := len(old) item := old[n-1] diff --git a/cmd/zoekt-sourcegraph-indexserver/queue_test.go b/cmd/zoekt-sourcegraph-indexserver/queue_test.go index e26a14c6d..0d05625ec 100644 --- a/cmd/zoekt-sourcegraph-indexserver/queue_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/queue_test.go @@ -12,6 +12,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/sourcegraph/log/logtest" + "github.com/sourcegraph/zoekt" ) @@ -19,7 +20,7 @@ func TestQueue(t *testing.T) { backoffDuration := 1 * time.Millisecond queue := NewQueue(backoffDuration, backoffDuration, logtest.Scoped(t)) - for i := 0; i < 100; i++ { + for i := range 100 { queue.AddOrUpdate(mkHEADIndexOptions(i, strconv.Itoa(i))) } @@ -64,7 +65,7 @@ func TestQueueFIFO(t *testing.T) { backoffDuration := 1 * time.Millisecond queue := NewQueue(backoffDuration, backoffDuration, logtest.Scoped(t)) - for i := 0; i < 100; i++ { + for i := range 100 { queue.AddOrUpdate(mkHEADIndexOptions(i, strconv.Itoa(i))) } @@ -189,7 +190,7 @@ func TestQueue_Integration_DebugQueue(t *testing.T) { // test: send a request to the queue's debug endpoint response, err := http.Get(server.URL) if err != nil { - t.Fatalf(err.Error()) + t.Fatalf("%s", err.Error()) } defer response.Body.Close() diff --git a/cmd/zoekt-sourcegraph-indexserver/sg.go b/cmd/zoekt-sourcegraph-indexserver/sg.go index 20f1b6ff9..da9b4e2ec 100644 --- a/cmd/zoekt-sourcegraph-indexserver/sg.go +++ b/cmd/zoekt-sourcegraph-indexserver/sg.go @@ -20,10 +20,8 @@ import ( "github.com/go-git/go-git/v5" "golang.org/x/net/trace" - proto "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1" - "github.com/sourcegraph/zoekt/ctags" - "github.com/sourcegraph/zoekt" + configv1 "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1" ) // SourcegraphListResult is the return value of Sourcegraph.List. It is its @@ -83,7 +81,7 @@ func WithBatchSize(batchSize int) SourcegraphClientOption { } } -func newSourcegraphClient(rootURL *url.URL, hostname string, grpcClient proto.ZoektConfigurationServiceClient, opts ...SourcegraphClientOption) *sourcegraphClient { +func newSourcegraphClient(rootURL *url.URL, hostname string, grpcClient configv1.ZoektConfigurationServiceClient, opts ...SourcegraphClientOption) *sourcegraphClient { client := &sourcegraphClient{ Root: rootURL, Hostname: hostname, @@ -113,7 +111,7 @@ type sourcegraphClient struct { BatchSize int // grpcClient is used to make requests to the Sourcegraph instance if gRPC is enabled. - grpcClient proto.ZoektConfigurationServiceClient + grpcClient configv1.ZoektConfigurationServiceClient // configFingerprintProto is the last config fingerprint (as GRPC) returned from // Sourcegraph. It can be used for future calls to the configuration @@ -121,7 +119,7 @@ type sourcegraphClient struct { // // configFingerprintProto is mutually exclusive with configFingerprint - this field // will only be used if gRPC is enabled. - configFingerprintProto *proto.Fingerprint + configFingerprintProto *configv1.Fingerprint // configFingerprintReset tracks when we should zero out the // configFingerprint. We want to periodically do this just in case our @@ -269,65 +267,32 @@ type indexOptionsItem struct { Error string } -func (o *indexOptionsItem) FromProto(x *proto.ZoektIndexOptions) { - branches := make([]zoekt.RepositoryBranch, 0, len(x.Branches)) - for _, b := range x.GetBranches() { - branches = append(branches, zoekt.RepositoryBranch{ - Name: b.GetName(), - Version: b.GetVersion(), - }) - } - +func (o *indexOptionsItem) FromProto(x *configv1.ZoektIndexOptions) { item := indexOptionsItem{} - languageMap := make(map[string]ctags.CTagsParserType) - - for _, lang := range x.GetLanguageMap() { - languageMap[lang.GetLanguage()] = ctags.CTagsParserType(lang.GetCtags().Number()) - } - - item.IndexOptions = IndexOptions{ - RepoID: uint32(x.GetRepoId()), - LargeFiles: x.GetLargeFiles(), - Symbols: x.GetSymbols(), - Branches: branches, - Name: x.GetName(), - - Priority: x.GetPriority(), - - Public: x.GetPublic(), - Fork: x.GetFork(), - Archived: x.GetArchived(), - - LanguageMap: languageMap, - ShardConcurrency: x.GetShardConcurrency(), - - TenantID: int(x.TenantId), - } - + item.IndexOptions.FromProto(x) item.Error = x.GetError() - *o = item } -func (o *indexOptionsItem) ToProto() *proto.ZoektIndexOptions { - branches := make([]*proto.ZoektRepositoryBranch, 0, len(o.Branches)) +func (o *indexOptionsItem) ToProto() *configv1.ZoektIndexOptions { + branches := make([]*configv1.ZoektRepositoryBranch, 0, len(o.Branches)) for _, b := range o.Branches { - branches = append(branches, &proto.ZoektRepositoryBranch{ + branches = append(branches, &configv1.ZoektRepositoryBranch{ Name: b.Name, Version: b.Version, }) } - languageMap := make([]*proto.LanguageMapping, 0, len(o.LanguageMap)) + languageMap := make([]*configv1.LanguageMapping, 0, len(o.LanguageMap)) for lang, parser := range o.LanguageMap { - languageMap = append(languageMap, &proto.LanguageMapping{ + languageMap = append(languageMap, &configv1.LanguageMapping{ Language: lang, - Ctags: proto.CTagsParserType(parser), + Ctags: configv1.CTagsParserType(parser), }) } - return &proto.ZoektIndexOptions{ + return &configv1.ZoektIndexOptions{ RepoId: int32(o.RepoID), LargeFiles: o.LargeFiles, Symbols: o.Symbols, @@ -349,13 +314,13 @@ func (o *indexOptionsItem) ToProto() *proto.ZoektIndexOptions { } } -func (s *sourcegraphClient) getIndexOptions(ctx context.Context, fingerprint *proto.Fingerprint, repos []uint32) ([]indexOptionsItem, *proto.Fingerprint, error) { +func (s *sourcegraphClient) getIndexOptions(ctx context.Context, fingerprint *configv1.Fingerprint, repos []uint32) ([]indexOptionsItem, *configv1.Fingerprint, error) { repoIDs := make([]int32, 0, len(repos)) for _, id := range repos { repoIDs = append(repoIDs, int32(id)) } - req := proto.SearchConfigurationRequest{ + req := configv1.SearchConfigurationRequest{ RepoIds: repoIDs, Fingerprint: fingerprint, } @@ -383,7 +348,7 @@ func (s *sourcegraphClient) getCloneURL(name string) string { } func (s *sourcegraphClient) listRepoIDs(ctx context.Context, indexed []uint32) ([]uint32, error) { - var request proto.ListRequest + var request configv1.ListRequest request.Hostname = s.Hostname request.IndexedIds = make([]int32, 0, len(indexed)) for _, id := range indexed { @@ -413,32 +378,32 @@ type updateIndexStatusRequest struct { Repositories []indexStatus } -func (u *updateIndexStatusRequest) ToProto() *proto.UpdateIndexStatusRequest { - repositories := make([]*proto.UpdateIndexStatusRequest_Repository, 0, len(u.Repositories)) +func (u *updateIndexStatusRequest) ToProto() *configv1.UpdateIndexStatusRequest { + repositories := make([]*configv1.UpdateIndexStatusRequest_Repository, 0, len(u.Repositories)) for _, repo := range u.Repositories { - branches := make([]*proto.ZoektRepositoryBranch, 0, len(repo.Branches)) + branches := make([]*configv1.ZoektRepositoryBranch, 0, len(repo.Branches)) for _, branch := range repo.Branches { - branches = append(branches, &proto.ZoektRepositoryBranch{ + branches = append(branches, &configv1.ZoektRepositoryBranch{ Name: branch.Name, Version: branch.Version, }) } - repositories = append(repositories, &proto.UpdateIndexStatusRequest_Repository{ + repositories = append(repositories, &configv1.UpdateIndexStatusRequest_Repository{ RepoId: repo.RepoID, Branches: branches, IndexTimeUnix: repo.IndexTimeUnix, }) } - return &proto.UpdateIndexStatusRequest{ + return &configv1.UpdateIndexStatusRequest{ Repositories: repositories, } } -func (u *updateIndexStatusRequest) FromProto(x *proto.UpdateIndexStatusRequest) { +func (u *updateIndexStatusRequest) FromProto(x *configv1.UpdateIndexStatusRequest) { protoRepositories := x.GetRepositories() repositories := make([]indexStatus, 0, len(protoRepositories)) diff --git a/cmd/zoekt-test/main.go b/cmd/zoekt-test/main.go index 6dee60297..68b836ae0 100644 --- a/cmd/zoekt-test/main.go +++ b/cmd/zoekt-test/main.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// zoekt-test compares the search engine results with raw substring search +// Command zoekt-test compares the zoekt results with raw substring search. package main import ( @@ -32,9 +32,9 @@ import ( "time" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/query" - "github.com/sourcegraph/zoekt/shards" + "github.com/sourcegraph/zoekt/search" ) func readTree(dir string) (map[string][]byte, error) { @@ -72,7 +72,7 @@ func compare(dir, patfile string, caseSensitive bool) error { } defer os.RemoveAll(indexDir) - var opts build.Options + var opts index.Options opts.SetDefaults() opts.IndexDir = indexDir @@ -84,7 +84,7 @@ func compare(dir, patfile string, caseSensitive bool) error { return fmt.Errorf("no contents") } - builder, err := build.NewBuilder(opts) + builder, err := index.NewBuilder(opts) if err != nil { return err } @@ -107,7 +107,7 @@ func compare(dir, patfile string, caseSensitive bool) error { if err != nil { return err } - searcher, err := shards.NewDirectorySearcher(indexDir) + searcher, err := search.NewDirectorySearcher(indexDir) if err != nil { return err } @@ -170,8 +170,8 @@ func compare(dir, patfile string, caseSensitive bool) error { } var ( - memprofile = flag.String("memprofile", "", "write memory profile to `file`") - cpuprofile = flag.String("cpuprofile", "", "write memory profile to `file`") + memprofile = flag.String("mem_profile", "", "write memory profile to `file`") + cpuprofile = flag.String("cpu_profile", "", "write cpu profile to `file`") ) func testLoadIndexDir(indexDir string) { @@ -179,7 +179,7 @@ func testLoadIndexDir(indexDir string) { runtime.GC() runtime.ReadMemStats(&a) start := time.Now() - s, err := shards.NewDirectorySearcher(indexDir) + s, err := search.NewDirectorySearcher(indexDir) if err != nil { return } diff --git a/cmd/zoekt-webserver/grpc/server/sampling_test.go b/cmd/zoekt-webserver/grpc/server/sampling_test.go index 3d06c4be6..3ac29326b 100644 --- a/cmd/zoekt-webserver/grpc/server/sampling_test.go +++ b/cmd/zoekt-webserver/grpc/server/sampling_test.go @@ -16,7 +16,7 @@ func TestSamplingStream(t *testing.T) { } fileEvents := func(n int) []*zoekt.SearchResult { res := make([]*zoekt.SearchResult, n) - for i := 0; i < n; i++ { + for i := range n { res[i] = filesEvent } return res @@ -26,7 +26,7 @@ func TestSamplingStream(t *testing.T) { } statsEvents := func(n int) []*zoekt.SearchResult { res := make([]*zoekt.SearchResult, n) - for i := 0; i < n; i++ { + for i := range n { res[i] = statsEvent } return res diff --git a/cmd/zoekt-webserver/grpc/server/server.go b/cmd/zoekt-webserver/grpc/server/server.go index 812b8a2f9..813b29c17 100644 --- a/cmd/zoekt-webserver/grpc/server/server.go +++ b/cmd/zoekt-webserver/grpc/server/server.go @@ -4,12 +4,12 @@ import ( "context" "math" - "github.com/sourcegraph/zoekt/grpc/chunk" - proto "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/grpc/chunk" + webserverv1 "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" "github.com/sourcegraph/zoekt/query" ) @@ -20,11 +20,11 @@ func NewServer(s zoekt.Streamer) *Server { } type Server struct { - proto.UnimplementedWebserverServiceServer + webserverv1.UnimplementedWebserverServiceServer streamer zoekt.Streamer } -func (s *Server) Search(ctx context.Context, req *proto.SearchRequest) (*proto.SearchResponse, error) { +func (s *Server) Search(ctx context.Context, req *webserverv1.SearchRequest) (*webserverv1.SearchResponse, error) { q, err := query.QFromProto(req.GetQuery()) if err != nil { return nil, status.Error(codes.InvalidArgument, err.Error()) @@ -38,7 +38,7 @@ func (s *Server) Search(ctx context.Context, req *proto.SearchRequest) (*proto.S return res.ToProto(), nil } -func (s *Server) StreamSearch(req *proto.StreamSearchRequest, ss proto.WebserverService_StreamSearchServer) error { +func (s *Server) StreamSearch(req *webserverv1.StreamSearchRequest, ss webserverv1.WebserverService_StreamSearchServer) error { request := req.GetRequest() q, err := query.QFromProto(request.GetQuery()) @@ -56,7 +56,7 @@ func (s *Server) StreamSearch(req *proto.StreamSearchRequest, ss proto.Webserver return err } -func (s *Server) List(ctx context.Context, req *proto.ListRequest) (*proto.ListResponse, error) { +func (s *Server) List(ctx context.Context, req *webserverv1.ListRequest) (*webserverv1.ListResponse, error) { q, err := query.QFromProto(req.GetQuery()) if err != nil { return nil, status.Error(codes.InvalidArgument, err.Error()) @@ -71,12 +71,12 @@ func (s *Server) List(ctx context.Context, req *proto.ListRequest) (*proto.ListR } // gRPCChunkSender is a zoekt.Sender that sends small chunks of FileMatches to the provided gRPC stream. -func gRPCChunkSender(ss proto.WebserverService_StreamSearchServer) zoekt.Sender { +func gRPCChunkSender(ss webserverv1.WebserverService_StreamSearchServer) zoekt.Sender { f := func(r *zoekt.SearchResult) { result := r.ToStreamProto().GetResponseChunk() if len(result.GetFiles()) == 0 { // stats-only result, send it immediately - _ = ss.Send(&proto.StreamSearchResponse{ + _ = ss.Send(&webserverv1.StreamSearchResponse{ ResponseChunk: result, }) return @@ -87,10 +87,10 @@ func gRPCChunkSender(ss proto.WebserverService_StreamSearchServer) zoekt.Sender statsSent := false numFilesSent := 0 - sendFunc := func(filesChunk []*proto.FileMatch) error { + sendFunc := func(filesChunk []*webserverv1.FileMatch) error { numFilesSent += len(filesChunk) - var stats *proto.Stats + var stats *webserverv1.Stats if !statsSent { // We only send stats back on the first chunk statsSent = true stats = result.GetStats() @@ -99,7 +99,7 @@ func gRPCChunkSender(ss proto.WebserverService_StreamSearchServer) zoekt.Sender progress := result.GetProgress() if numFilesSent < len(result.GetFiles()) { // more chunks to come - progress = &proto.Progress{ + progress = &webserverv1.Progress{ Priority: result.GetProgress().GetPriority(), // We want the client to consume the entire set of chunks - so we manually @@ -111,8 +111,8 @@ func gRPCChunkSender(ss proto.WebserverService_StreamSearchServer) zoekt.Sender } } - return ss.Send(&proto.StreamSearchResponse{ - ResponseChunk: &proto.SearchResponse{ + return ss.Send(&webserverv1.StreamSearchResponse{ + ResponseChunk: &webserverv1.SearchResponse{ Files: filesChunk, Stats: stats, diff --git a/cmd/zoekt-webserver/grpc/server/server_test.go b/cmd/zoekt-webserver/grpc/server/server_test.go index eae99d60d..7f2cbe135 100644 --- a/cmd/zoekt-webserver/grpc/server/server_test.go +++ b/cmd/zoekt-webserver/grpc/server/server_test.go @@ -12,7 +12,6 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" "go.uber.org/atomic" "golang.org/x/net/http2" "golang.org/x/net/http2/h2c" @@ -22,6 +21,7 @@ import ( "google.golang.org/protobuf/testing/protocmp" "github.com/sourcegraph/zoekt" + webserverv1 "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" "github.com/sourcegraph/zoekt/internal/mockSearcher" "github.com/sourcegraph/zoekt/query" ) @@ -52,7 +52,7 @@ func TestClientServer(t *testing.T) { gs := grpc.NewServer() defer gs.Stop() - v1.RegisterWebserverServiceServer(gs, NewServer(adapter{mock})) + webserverv1.RegisterWebserverServiceServer(gs, NewServer(adapter{mock})) ts := httptest.NewServer(h2c.NewHandler(gs, &http2.Server{})) defer ts.Close() @@ -66,9 +66,9 @@ func TestClientServer(t *testing.T) { } defer cc.Close() - client := v1.NewWebserverServiceClient(cc) + client := webserverv1.NewWebserverServiceClient(cc) - r, err := client.Search(context.Background(), &v1.SearchRequest{Query: query.QToProto(mock.WantSearch)}) + r, err := client.Search(context.Background(), &webserverv1.SearchRequest{Query: query.QToProto(mock.WantSearch)}) if err != nil { t.Fatal(err) } @@ -76,7 +76,7 @@ func TestClientServer(t *testing.T) { t.Fatalf("got %+v, want %+v", r, mock.SearchResult.ToProto()) } - l, err := client.List(context.Background(), &v1.ListRequest{Query: query.QToProto(mock.WantList)}) + l, err := client.List(context.Background(), &webserverv1.ListRequest{Query: query.QToProto(mock.WantList)}) if err != nil { t.Fatal(err) } @@ -85,8 +85,8 @@ func TestClientServer(t *testing.T) { t.Fatalf("got %+v, want %+v", l, mock.RepoList.ToProto()) } - request := v1.StreamSearchRequest{ - Request: &v1.SearchRequest{Query: query.QToProto(mock.WantSearch)}, + request := webserverv1.StreamSearchRequest{ + Request: &webserverv1.SearchRequest{Query: query.QToProto(mock.WantSearch)}, } cs, err := client.StreamSearch(context.Background(), &request) @@ -97,7 +97,7 @@ func TestClientServer(t *testing.T) { allResponses := readAllStream(t, cs) // check to make sure that we get the same set of file matches back - var receivedFileMatches []*v1.FileMatch + var receivedFileMatches []*webserverv1.FileMatch for _, r := range allResponses { receivedFileMatches = append(receivedFileMatches, r.GetFiles()...) } @@ -145,7 +145,7 @@ func TestFuzzGRPCChunkSender(t *testing.T) { // Safety, ensure that all other fields are echoed back correctly if the schema ever changes opts := []cmp.Option{ protocmp.Transform(), - protocmp.IgnoreFields(&v1.SearchResponse{}, + protocmp.IgnoreFields(&webserverv1.SearchResponse{}, "progress", // progress is tested above "stats", // aggregated stats are tested below "files", // files are tested separately @@ -159,7 +159,7 @@ func TestFuzzGRPCChunkSender(t *testing.T) { receivedStats := &zoekt.Stats{} - var receivedFileMatches []*v1.FileMatch + var receivedFileMatches []*webserverv1.FileMatch for _, r := range allResponses { receivedStats.Add(zoekt.StatsFromProto(r.GetStats())) receivedFileMatches = append(receivedFileMatches, r.GetFiles()...) @@ -168,7 +168,7 @@ func TestFuzzGRPCChunkSender(t *testing.T) { // Check to make sure that we get one set of stats back if diff := cmp.Diff(expectedResult.GetStats(), receivedStats.ToProto(), protocmp.Transform(), - protocmp.IgnoreFields(&v1.Stats{}, + protocmp.IgnoreFields(&webserverv1.Stats{}, "duration", // for whatever the duration field isn't updated when zoekt.Stats.Add is called ), ); diff != "" { @@ -195,7 +195,7 @@ func TestFuzzGRPCChunkSender(t *testing.T) { } // newPairedSearchStream returns a pair of client and server search streams that are connected to each other. -func newPairedSearchStream(t *testing.T) (v1.WebserverService_StreamSearchClient, v1.WebserverService_StreamSearchServer) { +func newPairedSearchStream(t *testing.T) (webserverv1.WebserverService_StreamSearchClient, webserverv1.WebserverService_StreamSearchServer) { client := &mockSearchStreamClient{t: t} server := &mockSearchStreamServer{t: t, pairedClient: client} @@ -205,7 +205,7 @@ func newPairedSearchStream(t *testing.T) (v1.WebserverService_StreamSearchClient type mockSearchStreamClient struct { t *testing.T - storedResponses []*v1.StreamSearchResponse + storedResponses []*webserverv1.StreamSearchResponse index int startedReading atomic.Bool @@ -213,7 +213,7 @@ type mockSearchStreamClient struct { grpc.ClientStream } -func (m *mockSearchStreamClient) Recv() (*v1.StreamSearchResponse, error) { +func (m *mockSearchStreamClient) Recv() (*webserverv1.StreamSearchResponse, error) { m.startedReading.Store(true) if m.index >= len(m.storedResponses) { @@ -225,7 +225,7 @@ func (m *mockSearchStreamClient) Recv() (*v1.StreamSearchResponse, error) { return r, nil } -func (m *mockSearchStreamClient) storeResponse(r *v1.StreamSearchResponse) { +func (m *mockSearchStreamClient) storeResponse(r *webserverv1.StreamSearchResponse) { if m.startedReading.Load() { m.t.Fatalf("cannot store additional responses after starting to read from stream") } @@ -241,18 +241,18 @@ type mockSearchStreamServer struct { grpc.ServerStream } -func (m *mockSearchStreamServer) Send(r *v1.StreamSearchResponse) error { +func (m *mockSearchStreamServer) Send(r *webserverv1.StreamSearchResponse) error { m.pairedClient.storeResponse(r) return nil } var ( - _ v1.WebserverService_StreamSearchServer = &mockSearchStreamServer{} - _ v1.WebserverService_StreamSearchClient = &mockSearchStreamClient{} + _ webserverv1.WebserverService_StreamSearchServer = &mockSearchStreamServer{} + _ webserverv1.WebserverService_StreamSearchClient = &mockSearchStreamClient{} ) -func readAllStream(t *testing.T, cs v1.WebserverService_StreamSearchClient) []*v1.SearchResponse { - var got []*v1.SearchResponse +func readAllStream(t *testing.T, cs webserverv1.WebserverService_StreamSearchClient) []*webserverv1.SearchResponse { + var got []*webserverv1.SearchResponse for { // collect all responses from the stream r, err := cs.Recv() if errors.Is(err, io.EOF) { diff --git a/cmd/zoekt-webserver/main.go b/cmd/zoekt-webserver/main.go index 2ba35f002..d9e5c5773 100644 --- a/cmd/zoekt-webserver/main.go +++ b/cmd/zoekt-webserver/main.go @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Command zoekt-webserver responds to search queries, using an index generated -// by another program such as zoekt-indexserver. - +// Command zoekt-webserver starts a server that responds to search queries, using +// an index generated by another program such as zoekt-indexserver. package main import ( @@ -33,43 +32,35 @@ import ( "os" "os/signal" "path/filepath" - "runtime" "strconv" "strings" - "sync" "time" - grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" - "github.com/sourcegraph/mountinfo" - "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" - "golang.org/x/net/http2" - "golang.org/x/net/http2/h2c" - "google.golang.org/grpc" - - "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" - zoektgrpc "github.com/sourcegraph/zoekt/cmd/zoekt-webserver/grpc/server" - "github.com/sourcegraph/zoekt/debugserver" - "github.com/sourcegraph/zoekt/grpc/internalerrs" - "github.com/sourcegraph/zoekt/grpc/messagesize" - "github.com/sourcegraph/zoekt/grpc/propagator" - proto "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" - "github.com/sourcegraph/zoekt/internal/profiler" - "github.com/sourcegraph/zoekt/internal/tenant" - "github.com/sourcegraph/zoekt/internal/tracer" - "github.com/sourcegraph/zoekt/query" - "github.com/sourcegraph/zoekt/shards" - "github.com/sourcegraph/zoekt/trace" - "github.com/sourcegraph/zoekt/web" - "github.com/opentracing/opentracing-go" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/shirou/gopsutil/v3/disk" sglog "github.com/sourcegraph/log" + "github.com/sourcegraph/mountinfo" "github.com/uber/jaeger-client-go" oteltrace "go.opentelemetry.io/otel/trace" "go.uber.org/automaxprocs/maxprocs" + "golang.org/x/sys/unix" + "google.golang.org/grpc" + + "github.com/sourcegraph/zoekt" + grpcserver "github.com/sourcegraph/zoekt/cmd/zoekt-webserver/grpc/server" + "github.com/sourcegraph/zoekt/grpc/defaults" + "github.com/sourcegraph/zoekt/grpc/grpcutil" + webserverv1 "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" + "github.com/sourcegraph/zoekt/index" + "github.com/sourcegraph/zoekt/internal/debugserver" + "github.com/sourcegraph/zoekt/internal/profiler" + "github.com/sourcegraph/zoekt/internal/trace" + "github.com/sourcegraph/zoekt/internal/tracer" + "github.com/sourcegraph/zoekt/query" + "github.com/sourcegraph/zoekt/search" + "github.com/sourcegraph/zoekt/web" ) const logFormat = "2006-01-02T15-04-05.999999999Z07" @@ -140,7 +131,7 @@ func main() { logRefresh := flag.Duration("log_refresh", 24*time.Hour, "if using --log_dir, start writing a new file this often.") listen := flag.String("listen", ":6070", "listen on this address.") - index := flag.String("index", build.DefaultDir, "set index directory to use") + indexDir := flag.String("index", index.DefaultDir, "set index directory to use") html := flag.Bool("html", true, "enable HTML interface") enableRPC := flag.Bool("rpc", false, "enable go/net RPC") enableIndexserverProxy := flag.Bool("indexserver_proxy", false, "proxy requests with URLs matching the path /indexserver/ to /indexserver.sock") @@ -154,12 +145,13 @@ func main() { templateDir := flag.String("template_dir", "", "set directory from which to load custom .html.tpl template files") dumpTemplates := flag.Bool("dump_templates", false, "dump templates into --template_dir and exit.") + corsOrigin := flag.String("cors_origin", "", "allow requests from this origin. If empty, no CORS headers are set.") version := flag.Bool("version", false, "Print version number") flag.Parse() if *version { - fmt.Printf("zoekt-webserver version %q\n", zoekt.Version) + fmt.Printf("zoekt-webserver version %q\n", index.Version) os.Exit(0) } @@ -172,8 +164,8 @@ func main() { resource := sglog.Resource{ Name: "zoekt-webserver", - Version: zoekt.Version, - InstanceID: zoekt.HostnameBestEffort(), + Version: index.Version, + InstanceID: index.HostnameBestEffort(), } liblog := sglog.Init(resource) @@ -194,25 +186,25 @@ func main() { // Tune GOMAXPROCS to match Linux container CPU quota. _, _ = maxprocs.Set() - if err := os.MkdirAll(*index, 0o755); err != nil { + if err := os.MkdirAll(*indexDir, 0o755); err != nil { log.Fatal(err) } - mustRegisterDiskMonitor(*index) + mustRegisterDiskMonitor(*indexDir) metricsLogger := sglog.Scoped("metricsRegistration") mustRegisterMemoryMapMetrics(metricsLogger) opts := mountinfo.CollectorOpts{Namespace: "zoekt_webserver"} - c := mountinfo.NewCollector(metricsLogger, opts, map[string]string{"indexDir": *index}) + c := mountinfo.NewCollector(metricsLogger, opts, map[string]string{"indexDir": *indexDir}) prometheus.DefaultRegisterer.MustRegister(c) // Do not block on loading shards so we can become partially available // sooner. Otherwise on large instances zoekt can be unavailable on the // order of minutes. - searcher, err := shards.NewDirectorySearcherFast(*index) + searcher, err := search.NewDirectorySearcherFast(*indexDir) if err != nil { log.Fatal(err) } @@ -225,7 +217,7 @@ func main() { s := &web.Server{ Searcher: searcher, Top: web.Top, - Version: zoekt.Version, + Version: index.Version, } if *templateDir != "" { @@ -261,7 +253,7 @@ func main() { debugserver.AddHandlers(serveMux, *enablePprof) if *enableIndexserverProxy { - socket := filepath.Join(*index, "indexserver.sock") + socket := filepath.Join(*indexDir, "indexserver.sock") sglog.Scoped("server").Info("adding reverse proxy", sglog.String("socket", socket)) addProxyHandler(serveMux, socket) } @@ -299,7 +291,8 @@ func main() { streamer := web.NewTraceAwareSearcher(s.Searcher) grpcServer := newGRPCServer(logger, streamer) - handler = multiplexGRPC(grpcServer, handler) + handler = grpcutil.MultiplexGRPC(grpcServer, handler) + handler = corsHandler(handler, *corsOrigin) srv := &http.Server{ Addr: *listen, @@ -335,24 +328,6 @@ func main() { } } -// multiplexGRPC takes a gRPC server and a plain HTTP handler and multiplexes the -// request handling. Any requests that declare themselves as gRPC requests are routed -// to the gRPC server, all others are routed to the httpHandler. -func multiplexGRPC(grpcServer *grpc.Server, httpHandler http.Handler) http.Handler { - newHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.ProtoMajor == 2 && strings.Contains(r.Header.Get("Content-Type"), "application/grpc") { - grpcServer.ServeHTTP(w, r) - } else { - httpHandler.ServeHTTP(w, r) - } - }) - - // Until we enable TLS, we need to fall back to the h2c protocol, which is - // basically HTTP2 without TLS. The standard library does not implement the - // h2s protocol, so this hijacks h2s requests and handles them correctly. - return h2c.NewHandler(newHandler, &http2.Server{}) -} - // addProxyHandler adds a handler to "mux" that proxies all requests with base // /indexserver to "socket". func addProxyHandler(mux *http.ServeMux, socket string) { @@ -376,8 +351,8 @@ func addProxyHandler(mux *http.ServeMux, socket string) { // times you will read the channel (used as buffer for signal.Notify). func shutdownSignalChan(maxReads int) <-chan os.Signal { c := make(chan os.Signal, maxReads) - signal.Notify(c, os.Interrupt) // terminal C-c and goreman - signal.Notify(c, PLATFORM_SIGTERM) // Kubernetes + signal.Notify(c, os.Interrupt) // terminal C-c and goreman + signal.Notify(c, unix.SIGTERM) // Kubernetes return c } @@ -486,28 +461,13 @@ Possible remediations: } } -func diskUsage(path string) (*disk.UsageStat, error) { - duPath := path - if runtime.GOOS == "windows" { - duPath = filepath.VolumeName(duPath) - } - usage, err := disk.Usage(duPath) - if err != nil { - return nil, fmt.Errorf("diskUsage: %w", err) - } - return usage, err -} - func mustRegisterDiskMonitor(path string) { prometheus.MustRegister(prometheus.NewGaugeFunc(prometheus.GaugeOpts{ Name: "src_disk_space_available_bytes", Help: "Amount of free space disk space.", ConstLabels: prometheus.Labels{"path": path}, }, func() float64 { - // I know there is no error handling here, and I don't like it - // but there was no error handling in the previous version - // that used Statfs, either, so I'm assuming there's no need for it - usage, _ := diskUsage(path) + usage, _ := disk.Usage(path) return float64(usage.Free) })) @@ -516,10 +476,7 @@ func mustRegisterDiskMonitor(path string) { Help: "Amount of total disk space.", ConstLabels: prometheus.Labels{"path": path}, }, func() float64 { - // I know there is no error handling here, and I don't like it - // but there was no error handling in the previous version - // that used Statfs, either, so I'm assuming there's no need for it - usage, _ := diskUsage(path) + usage, _ := disk.Usage(path) return float64(usage.Total) })) } @@ -641,41 +598,39 @@ func traceContext(ctx context.Context) sglog.TraceContext { } func newGRPCServer(logger sglog.Logger, streamer zoekt.Streamer, additionalOpts ...grpc.ServerOption) *grpc.Server { - metrics := serverMetricsOnce() - - opts := []grpc.ServerOption{ - grpc.StatsHandler(otelgrpc.NewServerHandler()), - grpc.ChainStreamInterceptor( - propagator.StreamServerPropagator(tenant.Propagator{}), - tenant.StreamServerInterceptor, - metrics.StreamServerInterceptor(), - messagesize.StreamServerInterceptor, - internalerrs.LoggingStreamServerInterceptor(logger), - ), - grpc.ChainUnaryInterceptor( - propagator.UnaryServerPropagator(tenant.Propagator{}), - tenant.UnaryServerInterceptor, - metrics.UnaryServerInterceptor(), - messagesize.UnaryServerInterceptor, - internalerrs.LoggingUnaryServerInterceptor(logger), - ), - } - - opts = append(opts, additionalOpts...) - - // Ensure that the message size options are set last, so they override any other - // server-specific options that tweak the message size. - // - // The message size options are only provided if the environment variable is set. These options serve as an escape hatch, so they - // take precedence over everything else with a uniform size setting that's easy to reason about. - opts = append(opts, messagesize.MustGetServerMessageSizeFromEnv()...) - - s := grpc.NewServer(opts...) - proto.RegisterWebserverServiceServer(s, zoektgrpc.NewServer(streamer)) + s := defaults.NewServer(logger, additionalOpts...) + webserverv1.RegisterWebserverServiceServer(s, grpcserver.NewServer(streamer)) return s } +func corsHandler(h http.Handler, origin string) http.Handler { + if origin == "" { + return h + } + + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Header.Get("Origin") == "" { + h.ServeHTTP(w, r) + return + } + + w.Header().Set("Access-Control-Allow-Origin", origin) + w.Header().Set("Access-Control-Max-Age", "86400") + w.Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization") + w.Header().Add("Vary", "Origin") + + if r.Method == "OPTIONS" { + w.Header().Set("Access-Control-Max-Age", "86400") + w.WriteHeader(http.StatusOK) + return + } + + h.ServeHTTP(w, r) + }) +} + var ( metricWatchdogErrors = promauto.NewGauge(prometheus.GaugeOpts{ Name: "zoekt_webserver_watchdog_errors", @@ -693,18 +648,4 @@ var ( Name: "zoekt_search_requests_total", Help: "The total number of search requests that zoekt received", }) - - // serviceMetricsOnce returns a singleton instance of the server metrics - // that are shared across all gRPC servers that this process creates. - // - // This function panics if the metrics cannot be registered with the default - // Prometheus registry. - serverMetricsOnce = sync.OnceValue(func() *grpcprom.ServerMetrics { - serverMetrics := grpcprom.NewServerMetrics( - grpcprom.WithServerCounterOptions(), - grpcprom.WithServerHandlingTimeHistogram(), // record the overall response latency for a gRPC request) - ) - prometheus.DefaultRegisterer.MustRegister(serverMetrics) - return serverMetrics - }) ) diff --git a/cmd/zoekt-webserver/main_unix.go b/cmd/zoekt-webserver/main_unix.go deleted file mode 100644 index cdcc52a0d..000000000 --- a/cmd/zoekt-webserver/main_unix.go +++ /dev/null @@ -1,9 +0,0 @@ -//go:build !windows - -package main - -import ( - platform "golang.org/x/sys/unix" -) - -const PLATFORM_SIGTERM = platform.SIGTERM diff --git a/cmd/zoekt-webserver/main_unsupported.go b/cmd/zoekt-webserver/main_unsupported.go deleted file mode 100644 index c2e7002ef..000000000 --- a/cmd/zoekt-webserver/main_unsupported.go +++ /dev/null @@ -1,18 +0,0 @@ -//go:build !linux - -package main - -import ( - sglog "github.com/sourcegraph/log" -) - -func mustRegisterMemoryMapMetrics(logger sglog.Logger) { - // The memory map metrics are collected via /proc, which - // is only available on linux-based operating systems. - - // as far as I can tell, Windows does not have the same - // virtual memory statistics as Linux. - // For example, Windows does not have the concept of - // a count of memory maps, and a max number of memory maps - return -} diff --git a/cmd/zoekt-webserver/main_windows.go b/cmd/zoekt-webserver/main_windows.go deleted file mode 100644 index 0e2a1ed23..000000000 --- a/cmd/zoekt-webserver/main_windows.go +++ /dev/null @@ -1,7 +0,0 @@ -package main - -import ( - platform "golang.org/x/sys/windows" -) - -const PLATFORM_SIGTERM = platform.SIGTERM diff --git a/cmd/zoekt-webserver/main_linux.go b/cmd/zoekt-webserver/metrics.go similarity index 100% rename from cmd/zoekt-webserver/main_linux.go rename to cmd/zoekt-webserver/metrics.go diff --git a/cmd/zoekt/main.go b/cmd/zoekt/main.go index 86cb51b5d..77a5c0be2 100644 --- a/cmd/zoekt/main.go +++ b/cmd/zoekt/main.go @@ -12,11 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +// The 'zoekt' command supports searching over an index directory or shard. package main import ( "bytes" "context" + "encoding/json" "flag" "fmt" "io" @@ -28,9 +30,11 @@ import ( "time" "github.com/felixge/fgprof" + "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/query" - "github.com/sourcegraph/zoekt/shards" + "github.com/sourcegraph/zoekt/search" ) func displayMatches(files []zoekt.FileMatch, pat string, withRepo bool, list bool) { @@ -51,6 +55,13 @@ func displayMatches(files []zoekt.FileMatch, pat string, withRepo bool, list boo } } +func displayMatchesJSONL(files []zoekt.FileMatch) { + encoder := json.NewEncoder(os.Stdout) + for _, f := range files { + encoder.Encode(f) + } +} + func addTabIfNonEmpty(s string) string { if s != "" { return "\t" + s @@ -64,19 +75,19 @@ func loadShard(fn string, verbose bool) (zoekt.Searcher, error) { return nil, err } - iFile, err := zoekt.NewIndexFile(f) + iFile, err := index.NewIndexFile(f) if err != nil { return nil, err } - s, err := zoekt.NewSearcher(iFile) + s, err := index.NewSearcher(iFile) if err != nil { iFile.Close() return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err) } if verbose { - repo, index, err := zoekt.ReadMetadata(iFile) + repo, index, err := index.ReadMetadata(iFile) if err != nil { iFile.Close() return nil, fmt.Errorf("ReadMetadata(%s): %v", fn, err) @@ -162,6 +173,7 @@ func main() { verbose := flag.Bool("v", false, "print some background data") withRepo := flag.Bool("r", false, "print the repo before the file name") list := flag.Bool("l", false, "print matching filenames only") + jsonl := flag.Bool("jsonl", false, "print results in jsonl format") sym := flag.Bool("sym", false, "do experimental symbol search") flag.Usage = func() { @@ -189,7 +201,7 @@ func main() { if *shard != "" { searcher, err = loadShard(*shard, *verbose) } else { - searcher, err = shards.NewDirectorySearcher(*index) + searcher, err = search.NewDirectorySearcher(*index) } if err != nil { @@ -226,7 +238,12 @@ func main() { sres, _ = searcher.Search(context.Background(), q, &sOpts) } - displayMatches(sres.Files, pat, *withRepo, *list) + if *jsonl { + displayMatchesJSONL(sres.Files) + } else { + displayMatches(sres.Files, pat, *withRepo, *list) + } + if *verbose { log.Printf("stats: %#v", sres.Stats) } diff --git a/ctag-overlay.nix b/ctag-overlay.nix new file mode 100644 index 000000000..e3d79d703 --- /dev/null +++ b/ctag-overlay.nix @@ -0,0 +1,17 @@ +self: super: rec { + my-universal-ctags = super.universal-ctags.overrideAttrs (old: rec { + version = "6.1.0"; + src = super.fetchFromGitHub { + owner = "universal-ctags"; + repo = "ctags"; + rev = "v${version}"; + sha256 = "sha256-f8+Ifjn7bhSYozOy7kn+zCLdHGrH3iFupHUZEGynz9Y="; + }; + # disable checks, else we get `make[1]: *** No rule to make target 'optlib/cmake.c'. Stop.` + doCheck = false; + checkFlags = [ ]; + }); + + # Skip building if same ctags version as registry + universal-ctags = if super.universal-ctags.version == my-universal-ctags.version then super.universal-ctags else my-universal-ctags; +} diff --git a/doc/design.md b/doc/design.md index df3bfeaae..b4d09d778 100644 --- a/doc/design.md +++ b/doc/design.md @@ -142,7 +142,8 @@ index shard are the following * branch masks * metadata (repository name, index format version, etc.) -In practice, the shard size is about 3x the corpus (size). +In practice, the shard size is about 3.5x the corpus size, composed of +original content, posting lists, and other metadata. The format uses uint32 for all offsets, so the total size of a shard should be below 4G. Given the size of the posting data, this caps @@ -179,6 +180,12 @@ For the latter, it is necessary to find symbol definitions and other sections within files on indexing. Several (imperfect) programs to do this already exist, eg. `ctags`. +Zoekt also supports an alternative BM25-based scoring algorithm that can be +enabled with `UseBM25Scoring`. When enabled, each match in a file is treated +as a term, and an approximation to BM25 is computed. This is useful for +multi-term queries, better handling of term frequency, and appropriate +document length normalization. + Query language -------------- diff --git a/doc/faq.md b/doc/faq.md index 56c85932f..a487d65e2 100644 --- a/doc/faq.md +++ b/doc/faq.md @@ -111,7 +111,9 @@ rudimentary support for filtering, and there is no symbol ranking. The search server should have local SSD to store the index file (which is 3.5x the corpus size), and have at least 20% more RAM than the -corpus size. +corpus size. For optimal performance with large codebases, consider +using machines with ample CPU cores, as search operations can be +parallelized across shards. ## Can I index multiple branches? diff --git a/doc/api.md b/doc/json-api.md similarity index 100% rename from doc/api.md rename to doc/json-api.md diff --git a/doc/query_syntax.md b/doc/query_syntax.md index dce64b633..0a5d8d667 100644 --- a/doc/query_syntax.md +++ b/doc/query_syntax.md @@ -2,13 +2,15 @@ This guide explains the Zoekt query language, used for searching text within Git repositories. Zoekt queries allow combining multiple filters and expressions using logical operators, negations, and grouping. Here's how to craft queries effectively. +For a brief overview of Zoekt's query syntax, see [these great docs from neogrok](https://neogrok-demo-web.fly.dev/syntax). + --- ## Syntax Overview A query is made up of expressions. An **expression** can be: - A negation (e.g., `-`), -- A field (e.g., `repo:`). +- A field (e.g., `repo:`), - A grouping (e.g., parentheses `()`), Logical `OR` operations combine multiple expressions. The **`AND` operator is implicit**, meaning multiple expressions written together will be automatically treated as `AND`. @@ -84,6 +86,38 @@ Use `or` to combine multiple expressions. --- +## Special Query Types + +### Filtering by Repository Type + +Zoekt supports filtering repositories by various attributes: + +```plaintext +public:yes archived:no fork:no +``` + +This finds repositories that are public, not archived, and not forks. + +### Result Type Control + +The `type:` operator controls what kind of results are returned: + +```plaintext +type:repo content:config +``` + +This returns repository names instead of file matches. Valid values include: +- `filematch` - Returns file content matches (default) +- `filename` - Returns only matching filenames +- `repo` - Returns only repository names + +`type:` applies to the whole expression in its current scope, including `or` +clauses. For example, `type:repo foo or bar` is equivalent to +`type:repo (foo or bar)`. Use parentheses to scope `type:` to only one branch, +for example `(type:repo foo) or bar`. + +--- + ## Special Query Values - **Boolean Values**: @@ -109,6 +143,19 @@ Use `or` to combine multiple expressions. --- +## Case Sensitivity + +Zoekt supports three case sensitivity modes: + +- `case:yes` - Exact case matching +- `case:no` - Case-insensitive matching +- `case:auto` - Automatically detect based on pattern (default) + +In auto mode, if the pattern contains uppercase letters, the search will be +case-sensitive; otherwise, it will be case-insensitive. + +--- + ## Advanced Examples 1. **Search for content in Python files in public repositories**: diff --git a/flake.lock b/flake.lock index 656413665..04bfb7b15 100644 --- a/flake.lock +++ b/flake.lock @@ -2,11 +2,11 @@ "nodes": { "nixpkgs": { "locked": { - "lastModified": 1709702697, - "narHash": "sha256-Nkh5/A/5wt5maDiIzjl+j8/Fg2d0veRica1a38rW46c=", + "lastModified": 1736798957, + "narHash": "sha256-qwpCtZhSsSNQtK4xYGzMiyEDhkNzOCz/Vfu4oL2ETsQ=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "0d3d44dbdfb96440c17e11ebafd6d3ec2bd703c2", + "rev": "9abb87b552b7f55ac8916b6fc9e5cb486656a2f3", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 08e1595c9..d33bdbede 100644 --- a/flake.nix +++ b/flake.nix @@ -16,22 +16,7 @@ overlays = [ self.overlays.ctags ]; }; in { default = import ./shell.nix { inherit pkgs; }; }); - # Pin a specific version of universal-ctags to the same version as in cmd/symbols/ctags-install-alpine.sh. - overlays.ctags = self: super: rec { - my-universal-ctags = super.universal-ctags.overrideAttrs (old: rec { - version = "6.1.0"; - src = super.fetchFromGitHub { - owner = "universal-ctags"; - repo = "ctags"; - rev = "v${version}"; - sha256 = "sha256-f8+Ifjn7bhSYozOy7kn+zCLdHGrH3iFupHUZEGynz9Y="; - }; - # disable checks, else we get `make[1]: *** No rule to make target 'optlib/cmake.c'. Stop.` - doCheck = false; - checkFlags = [ ]; - }); - # Skip building if same ctags version as registry - universal-ctags = if super.universal-ctags.version == my-universal-ctags.version then super.universal-ctags else my-universal-ctags; - }; + # Pin a specific version of universal-ctags to the same version as in ./install-ctags-alpine.sh. + overlays.ctags = import ./ctag-overlay.nix; }; } diff --git a/gitindex/catfile.go b/gitindex/catfile.go new file mode 100644 index 000000000..1933c337c --- /dev/null +++ b/gitindex/catfile.go @@ -0,0 +1,244 @@ +// Copyright 2016 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gitindex + +import ( + "bufio" + "bytes" + "encoding/hex" + "fmt" + "io" + "os/exec" + "strconv" + "sync" + "syscall" + + "github.com/go-git/go-git/v5/plumbing" +) + +type catfileReaderOptions struct { + filterSpec string +} + +// catfileReader provides streaming access to git blob objects via a pipelined +// "git cat-file --batch --buffer" process. A writer goroutine feeds all blob +// SHAs to stdin while the caller reads responses one at a time, similar to +// archive/tar.Reader. +// +// The --buffer flag switches git's output from per-object flush (write_or_die) +// to libc stdio buffering (fwrite), reducing syscalls. After stdin EOF, git +// calls fflush(stdout) to deliver any remaining output. +// +// Usage: +// +// cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) +// if err != nil { ... } +// defer cr.Close() +// +// for { +// size, missing, excluded, err := cr.Next() +// if err == io.EOF { break } +// if missing { continue } +// if excluded { continue } +// if size > maxSize { continue } // unread bytes auto-skipped +// content := make([]byte, size) +// io.ReadFull(cr, content) +// } +type catfileReader struct { + cmd *exec.Cmd + reader *bufio.Reader + writeErr <-chan error + + // pending tracks unread content bytes + trailing LF for the current + // entry. Next() discards any pending bytes before reading the next header. + pending int + + closeOnce sync.Once + closeErr error +} + +// newCatfileReader starts a "git cat-file --batch --buffer" process and feeds +// all ids to its stdin via a background goroutine. The caller must call Close +// when done. Pass a zero-value catfileReaderOptions when no options are needed. +func newCatfileReader(repoDir string, ids []plumbing.Hash, opts catfileReaderOptions) (*catfileReader, error) { + args := []string{"cat-file", "--batch", "--buffer"} + if opts.filterSpec != "" { + args = append(args, "--filter="+opts.filterSpec) + } + + cmd := exec.Command("git", args...) + cmd.Dir = repoDir + + stdin, err := cmd.StdinPipe() + if err != nil { + return nil, fmt.Errorf("stdin pipe: %w", err) + } + + stdout, err := cmd.StdoutPipe() + if err != nil { + stdin.Close() + return nil, fmt.Errorf("stdout pipe: %w", err) + } + + if err := cmd.Start(); err != nil { + stdin.Close() + stdout.Close() + return nil, fmt.Errorf("start git cat-file: %w", err) + } + + // Writer goroutine: feed all SHAs then close stdin to trigger flush. + writeErr := make(chan error, 1) + go func() { + defer close(writeErr) + defer stdin.Close() + bw := bufio.NewWriterSize(stdin, 64*1024) + var hexBuf [41]byte + hexBuf[40] = '\n' + for _, id := range ids { + hex.Encode(hexBuf[:40], id[:]) + if _, err := bw.Write(hexBuf[:]); err != nil { + writeErr <- err + return + } + } + writeErr <- bw.Flush() + }() + + return &catfileReader{ + cmd: cmd, + reader: bufio.NewReaderSize(stdout, 512*1024), + writeErr: writeErr, + }, nil +} + +// Next advances to the next blob entry. It returns the blob's size and whether +// it is missing or excluded by the configured filter. Any unread content from +// the previous entry is automatically discarded. Returns io.EOF when all +// entries have been consumed. +// +// After Next returns successfully with missing=false and excluded=false, call +// Read to consume the blob content, or call Next again to skip it. +func (cr *catfileReader) Next() (size int, missing bool, excluded bool, err error) { + // Discard unread content from the previous entry. + if cr.pending > 0 { + if _, err := cr.reader.Discard(cr.pending); err != nil { + return 0, false, false, fmt.Errorf("discard pending bytes: %w", err) + } + cr.pending = 0 + } + + headerBytes, err := cr.reader.ReadBytes('\n') + if err != nil { + if err == io.EOF { + return 0, false, false, io.EOF + } + return 0, false, false, fmt.Errorf("read header: %w", err) + } + header := headerBytes[:len(headerBytes)-1] // trim \n + + if bytes.HasSuffix(header, []byte(" missing")) { + return 0, true, false, nil + } + + if bytes.HasSuffix(header, []byte(" excluded")) { + return 0, false, true, nil + } + + // Parse size from " ". + lastSpace := bytes.LastIndexByte(header, ' ') + if lastSpace == -1 { + return 0, false, false, fmt.Errorf("unexpected header: %q", header) + } + size, err = strconv.Atoi(string(header[lastSpace+1:])) + if err != nil { + return 0, false, false, fmt.Errorf("parse size from %q: %w", header, err) + } + + // Track pending bytes: content + trailing LF. + cr.pending = size + 1 + return size, false, false, nil +} + +// Read reads from the current blob's content. Implements io.Reader. Returns +// io.EOF when the blob's content has been fully read (the trailing LF +// delimiter is consumed automatically). +func (cr *catfileReader) Read(p []byte) (int, error) { + if cr.pending <= 0 { + return 0, io.EOF + } + + // Don't read into the trailing LF byte — reserve it. + contentRemaining := cr.pending - 1 + if contentRemaining <= 0 { + // Only the trailing LF remains; consume it and signal EOF. + if _, err := cr.reader.ReadByte(); err != nil { + return 0, fmt.Errorf("read trailing LF: %w", err) + } + cr.pending = 0 + return 0, io.EOF + } + + // Limit the read to the remaining content bytes. + if len(p) > contentRemaining { + p = p[:contentRemaining] + } + n, err := cr.reader.Read(p) + cr.pending -= n + if err != nil { + return n, err + } + + // If we've consumed all content bytes, also consume the trailing LF. + if cr.pending == 1 { + if _, err := cr.reader.ReadByte(); err != nil { + return n, fmt.Errorf("read trailing LF: %w", err) + } + cr.pending = 0 + } + + return n, nil +} + +// Close shuts down the cat-file process and waits for it to exit. +// It is safe to call Close multiple times or concurrently. +func (cr *catfileReader) Close() error { + cr.closeOnce.Do(func() { + // Kill first to avoid blocking on drain when there are many + // unconsumed entries. Gitaly uses the same kill-first pattern. + _ = cr.cmd.Process.Kill() + // Drain any buffered stdout so the pipe closes cleanly. + // Must complete before cmd.Wait(), which closes the pipe. + _, _ = io.Copy(io.Discard, cr.reader) + // Wait for writer goroutine (unblocks via broken pipe from Kill). + <-cr.writeErr + err := cr.cmd.Wait() + // Suppress the expected "signal: killed" error from our own Kill(). + if isKilledErr(err) { + err = nil + } + cr.closeErr = err + }) + return cr.closeErr +} + +// isKilledErr reports whether err is an exec.ExitError caused by SIGKILL. +func isKilledErr(err error) bool { + exitErr, ok := err.(*exec.ExitError) + if !ok { + return false + } + ws, ok := exitErr.Sys().(syscall.WaitStatus) + return ok && ws.Signal() == syscall.SIGKILL +} diff --git a/gitindex/catfile_bench_test.go b/gitindex/catfile_bench_test.go new file mode 100644 index 000000000..5b8897380 --- /dev/null +++ b/gitindex/catfile_bench_test.go @@ -0,0 +1,159 @@ +package gitindex + +import ( + "fmt" + "io" + "os" + "testing" + + "github.com/go-git/go-git/v5/plumbing" +) + +// Set ZOEKT_BENCH_REPO to a git checkout to enable these benchmarks. +// +// git clone --depth=1 https://github.com/kubernetes/kubernetes /tmp/k8s +// ZOEKT_BENCH_REPO=/tmp/k8s go test ./gitindex/ -bench=BenchmarkBlobRead -benchmem -count=5 -timeout=600s + +func requireBenchGitRepo(b *testing.B) string { + b.Helper() + dir := os.Getenv("ZOEKT_BENCH_REPO") + if dir == "" { + b.Skip("ZOEKT_BENCH_REPO not set") + } + return dir +} + +// collectBlobKeys opens the repo, walks HEAD, and returns all fileKeys with +// their BlobLocations plus the repo directory path. +func collectBlobKeys(b *testing.B, repoDir string) (map[fileKey]BlobLocation, string) { + b.Helper() + + repo, closer, err := openRepo(repoDir) + if err != nil { + b.Fatalf("openRepo: %v", err) + } + b.Cleanup(func() { closer.Close() }) + + head, err := repo.Head() + if err != nil { + b.Fatalf("Head: %v", err) + } + + commit, err := repo.CommitObject(head.Hash()) + if err != nil { + b.Fatalf("CommitObject: %v", err) + } + + tree, err := commit.Tree() + if err != nil { + b.Fatalf("Tree: %v", err) + } + + rw := NewRepoWalker(repo, "https://example.com/repo", nil) + if _, err := rw.CollectFiles(tree, "HEAD", nil); err != nil { + b.Fatalf("CollectFiles: %v", err) + } + + return rw.Files, repoDir +} + +// sortedBlobKeys returns fileKeys for deterministic iteration. +func sortedBlobKeys(files map[fileKey]BlobLocation) []fileKey { + keys := make([]fileKey, 0, len(files)) + for k := range files { + keys = append(keys, k) + } + return keys +} + +// BenchmarkBlobRead_GoGit measures the current go-git BlobObject approach: +// sequential calls to repo.GitRepo.BlobObject(hash) for each file. +func BenchmarkBlobRead_GoGit(b *testing.B) { + repoDir := requireBenchGitRepo(b) + files, _ := collectBlobKeys(b, repoDir) + keys := sortedBlobKeys(files) + b.Logf("collected %d blob keys", len(keys)) + + for _, n := range []int{1_000, 5_000, len(keys)} { + n = min(n, len(keys)) + subset := keys[:n] + + b.Run(fmt.Sprintf("files=%d", n), func(b *testing.B) { + b.ReportAllocs() + var totalBytes int64 + for b.Loop() { + totalBytes = 0 + for _, key := range subset { + loc := files[key] + blob, err := loc.GitRepo.BlobObject(key.ID) + if err != nil { + b.Fatalf("BlobObject(%s): %v", key.ID, err) + } + r, err := blob.Reader() + if err != nil { + b.Fatalf("Reader: %v", err) + } + n, err := io.Copy(io.Discard, r) + r.Close() + if err != nil { + b.Fatalf("Read: %v", err) + } + totalBytes += n + } + } + b.ReportMetric(float64(totalBytes), "content-bytes/op") + b.ReportMetric(float64(len(subset)), "files/op") + }) + } +} + +// BenchmarkBlobRead_CatfileReader measures the streaming catfileReader approach: +// all SHAs written to stdin at once via --buffer, responses read one at a time. +// This is the production path used by indexGitRepo. +func BenchmarkBlobRead_CatfileReader(b *testing.B) { + repoDir := requireBenchGitRepo(b) + files, gitDir := collectBlobKeys(b, repoDir) + keys := sortedBlobKeys(files) + b.Logf("collected %d blob keys", len(keys)) + + ids := make([]plumbing.Hash, len(keys)) + for i, k := range keys { + ids[i] = k.ID + } + + for _, n := range []int{1_000, 5_000, len(keys)} { + n = min(n, len(keys)) + subset := ids[:n] + + b.Run(fmt.Sprintf("files=%d", n), func(b *testing.B) { + b.ReportAllocs() + var totalBytes int64 + for b.Loop() { + totalBytes = 0 + cr, err := newCatfileReader(gitDir, subset, catfileReaderOptions{}) + if err != nil { + b.Fatalf("newCatfileReader: %v", err) + } + for range subset { + size, missing, excluded, err := cr.Next() + if err != nil { + cr.Close() + b.Fatalf("Next: %v", err) + } + if missing || excluded { + continue + } + content := make([]byte, size) + if _, err := io.ReadFull(cr, content); err != nil { + cr.Close() + b.Fatalf("ReadFull: %v", err) + } + totalBytes += int64(len(content)) + } + cr.Close() + } + b.ReportMetric(float64(totalBytes), "content-bytes/op") + b.ReportMetric(float64(len(subset)), "files/op") + }) + } +} diff --git a/gitindex/catfile_hardening_test.go b/gitindex/catfile_hardening_test.go new file mode 100644 index 000000000..e1da3faf7 --- /dev/null +++ b/gitindex/catfile_hardening_test.go @@ -0,0 +1,870 @@ +package gitindex + +import ( + "bytes" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "sync" + "testing" + "time" + + "github.com/go-git/go-git/v5/plumbing" +) + +// --- Close lifecycle tests --- + +// TestCatfileReader_DoubleClose verifies that Close is idempotent. +// Calling Close twice must not deadlock or panic. +func TestCatfileReader_DoubleClose(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + ids := []plumbing.Hash{blobs["hello.txt"]} + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + + // Consume the entry so the process can exit cleanly. + if _, _, _, err := cr.Next(); err != nil { + t.Fatal(err) + } + + if err := cr.Close(); err != nil { + t.Fatalf("first Close: %v", err) + } + + // Second Close must not deadlock or panic. + done := make(chan error, 1) + go func() { + done <- cr.Close() + }() + + select { + case <-done: + // Success — whether err is nil or not, it didn't block. + case <-time.After(5 * time.Second): + t.Fatal("second Close() deadlocked — writeErr channel was never closed") + } +} + +// TestCatfileReader_ConcurrentClose verifies that calling Close from +// multiple goroutines simultaneously does not panic, deadlock, or +// corrupt state. +func TestCatfileReader_ConcurrentClose(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + ids := []plumbing.Hash{ + blobs["hello.txt"], + blobs["large.bin"], + blobs["binary.bin"], + } + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + + // Read one entry, leave two unconsumed. + if _, _, _, err := cr.Next(); err != nil { + t.Fatal(err) + } + + const goroutines = 5 + var wg sync.WaitGroup + wg.Add(goroutines) + barrier := make(chan struct{}) + + for i := 0; i < goroutines; i++ { + go func() { + defer wg.Done() + <-barrier // all start at once + cr.Close() + }() + } + + done := make(chan struct{}) + go func() { + close(barrier) + wg.Wait() + close(done) + }() + + select { + case <-done: + // All goroutines returned. + case <-time.After(10 * time.Second): + t.Fatal("concurrent Close() deadlocked") + } +} + +// TestCatfileReader_CloseWithoutReading verifies that closing +// immediately after creation (without reading any entries) completes +// without hanging. +func TestCatfileReader_CloseWithoutReading(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + ids := []plumbing.Hash{ + blobs["hello.txt"], + blobs["large.bin"], + blobs["binary.bin"], + blobs["empty.txt"], + } + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + + done := make(chan error, 1) + go func() { + done <- cr.Close() + }() + + select { + case err := <-done: + if err != nil { + t.Fatalf("Close: %v", err) + } + case <-time.After(10 * time.Second): + t.Fatal("Close() without reading any entries hung") + } +} + +// TestCatfileReader_CloseBeforeExhausted_ManyBlobs simulates early +// termination (e.g., builder.Add error) with many unconsumed blobs. +// Close should complete promptly — not drain the entire git output. +func TestCatfileReader_CloseBeforeExhausted_ManyBlobs(t *testing.T) { + t.Parallel() + + // Create enough blobs to make a draining Close noticeable without spending + // most of the test runtime on shelling out for fixture setup. + repoDir, ids := createManyBlobRepo(t, 128, 4<<10) + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + + // Read only 1 of 200 entries. + if _, _, _, err := cr.Next(); err != nil { + t.Fatal(err) + } + + // Close should be fast (kill, not drain). With drain it still works but + // is slow — we enforce a generous bound. + start := time.Now() + done := make(chan error, 1) + go func() { + done <- cr.Close() + }() + + select { + case <-done: + elapsed := time.Since(start) + // With Kill: sub-millisecond. Draining 200×10KB is fast too, so we + // use a generous 3s bound that still catches pathological stalls. + if elapsed > 3*time.Second { + t.Errorf("Close took %v after reading 1 of 200 entries — consider killing instead of draining", elapsed) + } + case <-time.After(30 * time.Second): + t.Fatal("Close() deadlocked with many unconsumed blobs") + } +} + +func createManyBlobRepo(t *testing.T, fileCount, fileSize int) (string, []plumbing.Hash) { + t.Helper() + + dir := t.TempDir() + repoDir := filepath.Join(dir, "repo") + + runGit(t, dir, "init", "-b", "main", "repo") + + for i := 0; i < fileCount; i++ { + content := bytes.Repeat([]byte{byte(i)}, fileSize) + name := filepath.Join(repoDir, fmt.Sprintf("file_%03d.bin", i)) + if err := os.WriteFile(name, content, 0o644); err != nil { + t.Fatalf("WriteFile(%q): %v", name, err) + } + } + + runGit(t, repoDir, "add", ".") + runGit(t, repoDir, "commit", "-m", "many files") + + out, err := exec.Command("git", "-C", repoDir, "ls-tree", "-r", "-z", "HEAD").Output() + if err != nil { + t.Fatalf("git ls-tree: %v", err) + } + + ids := make([]plumbing.Hash, 0, fileCount) + for _, entry := range bytes.Split(out, []byte{0}) { + if len(entry) == 0 { + continue + } + + fields := bytes.Fields(entry) + if len(fields) < 3 { + t.Fatalf("unexpected ls-tree entry %q", entry) + } + + ids = append(ids, plumbing.NewHash(string(fields[2]))) + } + + if len(ids) != fileCount { + t.Fatalf("got %d blob IDs, want %d", len(ids), fileCount) + } + + return repoDir, ids +} + +// --- Read edge-case tests --- + +// TestCatfileReader_ReadWithoutNext verifies that calling Read +// before calling Next returns io.EOF, not a panic or garbage data. +func TestCatfileReader_ReadWithoutNext(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + ids := []plumbing.Hash{blobs["hello.txt"]} + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + buf := make([]byte, 10) + n, err := cr.Read(buf) + if n != 0 || err != io.EOF { + t.Fatalf("Read without Next: n=%d err=%v, want n=0 err=io.EOF", n, err) + } +} + +// TestCatfileReader_ReadAfterFullConsumption verifies that extra Read +// calls after a blob is fully consumed return io.EOF, not duplicate +// data or trailing LF bytes. +func TestCatfileReader_ReadAfterFullConsumption(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + ids := []plumbing.Hash{blobs["hello.txt"]} + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + size, _, _, _ := cr.Next() + content := make([]byte, size) + if _, err := io.ReadFull(cr, content); err != nil { + t.Fatal(err) + } + + // Blob is fully read — additional Reads must return EOF. + for i := 0; i < 3; i++ { + buf := make([]byte, 10) + n, err := cr.Read(buf) + if n != 0 || err != io.EOF { + t.Fatalf("Read #%d after full consumption: n=%d err=%v, want n=0 err=io.EOF", i, n, err) + } + } +} + +// TestCatfileReader_SmallBufferReads reads a blob one byte at a time +// and verifies the entire content is reconstructed correctly without +// any trailing LF leaking into user content. +func TestCatfileReader_SmallBufferReads(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + ids := []plumbing.Hash{blobs["hello.txt"]} + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + size, _, _, _ := cr.Next() + + var result []byte + buf := make([]byte, 1) + for { + n, err := cr.Read(buf) + if n > 0 { + result = append(result, buf[:n]...) + } + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + } + + if len(result) != size { + t.Fatalf("read %d bytes, want %d", len(result), size) + } + if string(result) != "hello world\n" { + t.Errorf("content = %q, want %q", result, "hello world\n") + } +} + +// TestCatfileReader_PartialReadThenNext reads only part of a blob's +// content, then advances to the next entry. Verifies that the discard +// of pending bytes doesn't corrupt the stream. +func TestCatfileReader_PartialReadThenNext(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + ids := []plumbing.Hash{ + blobs["hello.txt"], // 12 bytes: "hello world\n" + blobs["binary.bin"], // variable, starts with 0x00 + } + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + // Read only 5 of 12 bytes from hello.txt. + size, _, _, _ := cr.Next() + if size != 12 { + t.Fatalf("hello.txt size = %d, want 12", size) + } + partial := make([]byte, 5) + if _, err := io.ReadFull(cr, partial); err != nil { + t.Fatal(err) + } + if string(partial) != "hello" { + t.Fatalf("partial = %q, want %q", partial, "hello") + } + + // Advance — must discard remaining 7 content bytes + trailing LF. + size, _, _, err = cr.Next() + if err != nil { + t.Fatalf("Next binary.bin after partial read: %v", err) + } + + // Verify binary.bin content is intact. + content := make([]byte, size) + if _, err := io.ReadFull(cr, content); err != nil { + t.Fatal(err) + } + if content[0] != 0x00 { + t.Errorf("binary.bin first byte = 0x%02x after partial-read skip, want 0x00", content[0]) + } +} + +// TestCatfileReader_PartialReadExactlyOneByteShort reads size-1 bytes +// from a blob. The pending field should be exactly 2 (1 content byte + +// 1 trailing LF). This stresses the boundary between content and LF +// in the discard path. +func TestCatfileReader_PartialReadExactlyOneByteShort(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + ids := []plumbing.Hash{ + blobs["hello.txt"], // 12 bytes + blobs["binary.bin"], // starts with 0x00 + } + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + size, _, _, _ := cr.Next() + // Read exactly size-1 bytes — leaves 1 content byte + trailing LF. + buf := make([]byte, size-1) + if _, err := io.ReadFull(cr, buf); err != nil { + t.Fatal(err) + } + if string(buf) != "hello world" { // missing final \n + t.Fatalf("partial = %q", buf) + } + + // Advance — pending should be 2 (1 content byte + 1 LF). The + // Discard call must handle this exact boundary correctly. + size, missing, excluded, err := cr.Next() + if err != nil { + t.Fatalf("Next after size-1 partial read: %v", err) + } + if missing || excluded { + t.Fatal("binary.bin unexpectedly missing") + } + + // Read binary.bin to verify stream integrity. + content := make([]byte, size) + if _, err := io.ReadFull(cr, content); err != nil { + t.Fatal(err) + } + if content[0] != 0x00 { + t.Errorf("binary.bin[0] = 0x%02x after boundary skip, want 0x00", content[0]) + } +} + +// --- Empty / degenerate input tests --- + +// TestCatfileReader_EmptyIds verifies that an empty id slice produces +// immediate EOF without errors. +func TestCatfileReader_EmptyIds(t *testing.T) { + t.Parallel() + + repoDir, _ := createTestRepo(t) + + cr, err := newCatfileReader(repoDir, nil, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + _, _, _, err = cr.Next() + if err != io.EOF { + t.Fatalf("expected io.EOF for empty ids, got %v", err) + } +} + +// TestCatfileReader_MultipleEmptyBlobs stresses the trailing-LF +// handling for size-0 blobs. Git still outputs a LF after a 0-byte +// blob body. Repeated empty blobs test the pending=1 discard path. +func TestCatfileReader_MultipleEmptyBlobs(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + + // Send the empty blob SHA 5 times — git outputs each independently. + emptyID := blobs["empty.txt"] + ids := []plumbing.Hash{emptyID, emptyID, emptyID, emptyID, emptyID} + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + for i := range ids { + size, missing, excluded, err := cr.Next() + if err != nil { + t.Fatalf("Next #%d: %v", i, err) + } + if missing || excluded { + t.Fatalf("#%d unexpectedly missing", i) + } + if size != 0 { + t.Fatalf("#%d size = %d, want 0", i, size) + } + // Don't read — Next should discard the trailing LF for us. + } + + _, _, _, err = cr.Next() + if err != io.EOF { + t.Fatalf("expected EOF after %d empty blobs, got %v", len(ids), err) + } +} + +// TestCatfileReader_EmptyBlobRead verifies that reading a 0-byte blob +// through the io.Reader interface returns 0 bytes and io.EOF, and that +// the trailing LF is consumed transparently. +func TestCatfileReader_EmptyBlobRead(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + ids := []plumbing.Hash{ + blobs["empty.txt"], // 0 bytes + blobs["hello.txt"], // 12 bytes — sentinel + } + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + size, _, _, _ := cr.Next() + if size != 0 { + t.Fatalf("empty.txt size = %d", size) + } + + // Explicitly Read on the 0-byte blob. + buf := make([]byte, 10) + n, err := cr.Read(buf) + if n != 0 || err != io.EOF { + t.Fatalf("Read empty blob: n=%d err=%v, want n=0 err=io.EOF", n, err) + } + + // The trailing LF must have been consumed. Verify by reading the + // next entry — if the LF leaked, the header parse would fail. + size, _, _, err = cr.Next() + if err != nil { + t.Fatalf("Next hello.txt after empty blob Read: %v", err) + } + if size != 12 { + t.Fatalf("hello.txt size = %d, want 12", size) + } + content := make([]byte, size) + if _, err := io.ReadFull(cr, content); err != nil { + t.Fatal(err) + } + if string(content) != "hello world\n" { + t.Errorf("hello.txt = %q", content) + } +} + +// --- Missing object edge cases --- + +// TestCatfileReader_AllMissing verifies that a sequence of entirely +// missing objects is handled gracefully — no errors, no panics, just +// missing=true for each followed by EOF. +func TestCatfileReader_AllMissing(t *testing.T) { + t.Parallel() + + repoDir, _ := createTestRepo(t) + + ids := []plumbing.Hash{ + plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"), + plumbing.NewHash("1111111111111111111111111111111111111111"), + plumbing.NewHash("2222222222222222222222222222222222222222"), + } + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + for i, id := range ids { + _, missing, excluded, err := cr.Next() + if err != nil { + t.Fatalf("Next #%d (%s): %v", i, id, err) + } + if excluded { + t.Errorf("expected #%d (%s) to be missing, not excluded", i, id) + } + if !missing { + t.Errorf("expected #%d (%s) to be missing", i, id) + } + } + + _, _, _, err = cr.Next() + if err != io.EOF { + t.Fatalf("expected EOF after all missing, got %v", err) + } +} + +// TestCatfileReader_AlternatingMissingPresent interleaves missing and +// present objects, verifying that stream alignment is maintained. +func TestCatfileReader_AlternatingMissingPresent(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + + fake1 := plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef") + fake2 := plumbing.NewHash("1111111111111111111111111111111111111111") + + ids := []plumbing.Hash{ + fake1, + blobs["hello.txt"], + fake2, + blobs["empty.txt"], + blobs["binary.bin"], + } + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + // fake1 — missing + _, missing, excluded, err := cr.Next() + if err != nil || !missing || excluded { + t.Fatalf("fake1: err=%v missing=%v excluded=%v", err, missing, excluded) + } + + // hello.txt — present, read it + size, missing, excluded, err := cr.Next() + if err != nil || missing || excluded { + t.Fatalf("hello.txt: err=%v missing=%v excluded=%v", err, missing, excluded) + } + content := make([]byte, size) + if _, err := io.ReadFull(cr, content); err != nil { + t.Fatal(err) + } + if string(content) != "hello world\n" { + t.Errorf("hello.txt = %q", content) + } + + // fake2 — missing + _, missing, excluded, err = cr.Next() + if err != nil || !missing || excluded { + t.Fatalf("fake2: err=%v missing=%v excluded=%v", err, missing, excluded) + } + + // empty.txt — present, skip it + size, missing, excluded, err = cr.Next() + if err != nil || missing || excluded { + t.Fatalf("empty.txt: err=%v missing=%v excluded=%v", err, missing, excluded) + } + if size != 0 { + t.Errorf("empty.txt size = %d", size) + } + + // binary.bin — present, read it + size, missing, excluded, err = cr.Next() + if err != nil || missing || excluded { + t.Fatalf("binary.bin: err=%v missing=%v excluded=%v", err, missing, excluded) + } + binContent := make([]byte, size) + if _, err := io.ReadFull(cr, binContent); err != nil { + t.Fatal(err) + } + if binContent[0] != 0x00 { + t.Errorf("binary.bin[0] = 0x%02x, want 0x00", binContent[0]) + } + + _, _, _, err = cr.Next() + if err != io.EOF { + t.Fatalf("expected EOF, got %v", err) + } +} + +// TestCatfileReader_MissingThenSkip verifies that a missing object +// followed by a present but skipped (unread) object doesn't corrupt +// the stream. Missing objects have no content body, so there must be +// no stale pending bytes interfering with the next header read. +func TestCatfileReader_MissingThenSkip(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + + fake := plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef") + ids := []plumbing.Hash{ + fake, + blobs["large.bin"], // 64KB — skip without reading + blobs["hello.txt"], // sentinel — read to verify integrity + } + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + // missing + _, missing, excluded, _ := cr.Next() + if !missing || excluded { + t.Fatal("expected missing") + } + + // large.bin — skip + size, missing, excluded, err := cr.Next() + if err != nil || missing || excluded { + t.Fatalf("large.bin: err=%v missing=%v excluded=%v", err, missing, excluded) + } + if size != 64*1024 { + t.Fatalf("large.bin size = %d", size) + } + // deliberately don't read + + // hello.txt — read after missing+skip + size, missing, excluded, err = cr.Next() + if err != nil || missing || excluded { + t.Fatalf("hello.txt: err=%v missing=%v excluded=%v", err, missing, excluded) + } + content := make([]byte, size) + if _, err := io.ReadFull(cr, content); err != nil { + t.Fatal(err) + } + if string(content) != "hello world\n" { + t.Errorf("hello.txt = %q", content) + } +} + +// --- Next() edge cases --- + +// TestCatfileReader_RepeatedNextAfterEOF verifies that calling Next +// after EOF keeps returning EOF — not a panic, not a different error. +func TestCatfileReader_RepeatedNextAfterEOF(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + ids := []plumbing.Hash{blobs["hello.txt"]} + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + // Consume and skip the only entry. + if _, _, _, err := cr.Next(); err != nil { + t.Fatal(err) + } + + // First EOF. + _, _, _, err = cr.Next() + if err != io.EOF { + t.Fatalf("first post-exhaust Next: %v, want io.EOF", err) + } + + // Second and third EOF — must be stable. + for i := 0; i < 2; i++ { + _, _, _, err = cr.Next() + if err != io.EOF { + t.Fatalf("Next #%d after EOF: %v, want io.EOF", i+2, err) + } + } +} + +// --- Large blob precision tests --- + +// TestCatfileReader_LargeBlobBytePrecision verifies that a 64KB blob +// is read with byte-exact precision — no off-by-one from trailing LF +// handling, no truncation, no extra bytes. +func TestCatfileReader_LargeBlobBytePrecision(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + ids := []plumbing.Hash{blobs["large.bin"]} + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + size, _, _, err := cr.Next() + if err != nil { + t.Fatal(err) + } + if size != 64*1024 { + t.Fatalf("size = %d, want %d", size, 64*1024) + } + + // Read the full blob content. + content := make([]byte, size) + n, err := io.ReadFull(cr, content) + if err != nil { + t.Fatalf("ReadFull: %v (read %d of %d)", err, n, size) + } + if n != size { + t.Fatalf("read %d bytes, want %d", n, size) + } + + // Verify git agrees on the content via cat-file -p. + expected, err := exec.Command("git", "-C", repoDir, "cat-file", "-p", blobs["large.bin"].String()).Output() + if err != nil { + t.Fatalf("git cat-file -p: %v", err) + } + if !bytes.Equal(content, expected) { + t.Errorf("content mismatch: got %d bytes, git says %d bytes", len(content), len(expected)) + // Find first divergence. + for i := range content { + if i >= len(expected) || content[i] != expected[i] { + t.Errorf("first diff at byte %d: got 0x%02x, want 0x%02x", i, content[i], expected[i]) + break + } + } + } +} + +// TestCatfileReader_LargeBlobChunkedRead reads a 64KB blob in 997-byte +// chunks (a prime number that doesn't align with any power-of-2 buffer) +// to verify no byte is lost or duplicated across read boundaries. +func TestCatfileReader_LargeBlobChunkedRead(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + ids := []plumbing.Hash{blobs["large.bin"]} + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + size, _, _, _ := cr.Next() + if size != 64*1024 { + t.Fatalf("size = %d", size) + } + + var result bytes.Buffer + buf := make([]byte, 997) // prime-sized chunks + for { + n, err := cr.Read(buf) + if n > 0 { + result.Write(buf[:n]) + } + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + } + + if result.Len() != size { + t.Fatalf("total read = %d, want %d", result.Len(), size) + } + + // Cross-check with git. + expected, _ := exec.Command("git", "-C", repoDir, "cat-file", "-p", blobs["large.bin"].String()).Output() + if !bytes.Equal(result.Bytes(), expected) { + t.Error("chunked read content differs from git cat-file -p output") + } +} + +// --- Duplicate SHA test --- + +// TestCatfileReader_DuplicateSHAs verifies that requesting the same +// SHA multiple times works — git cat-file --batch outputs the object +// for each request independently. +func TestCatfileReader_DuplicateSHAs(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + + sha := blobs["hello.txt"] + ids := []plumbing.Hash{sha, sha, sha} + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatal(err) + } + defer cr.Close() + + for i := 0; i < 3; i++ { + size, missing, excluded, err := cr.Next() + if err != nil { + t.Fatalf("Next #%d: %v", i, err) + } + if missing || excluded { + t.Fatalf("#%d unexpectedly missing", i) + } + if size != 12 { + t.Fatalf("#%d size = %d, want 12", i, size) + } + content := make([]byte, size) + if _, err := io.ReadFull(cr, content); err != nil { + t.Fatal(err) + } + if string(content) != "hello world\n" { + t.Errorf("#%d content = %q", i, content) + } + } + + _, _, _, err = cr.Next() + if err != io.EOF { + t.Fatalf("expected EOF, got %v", err) + } +} diff --git a/gitindex/catfile_test.go b/gitindex/catfile_test.go new file mode 100644 index 000000000..161141273 --- /dev/null +++ b/gitindex/catfile_test.go @@ -0,0 +1,286 @@ +package gitindex + +import ( + "bytes" + "io" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/go-git/go-git/v5/plumbing" +) + +// createTestRepo creates a git repo with various test files and returns +// the repo path and a map of filename -> blob SHA. +func createTestRepo(t *testing.T) (string, map[string]plumbing.Hash) { + t.Helper() + dir := t.TempDir() + repoDir := filepath.Join(dir, "repo") + + runGit(t, dir, "init", "-b", "main", "repo") + + files := []struct { + name string + content []byte + }{ + {name: "hello.txt", content: []byte("hello world\n")}, + {name: "empty.txt"}, + {name: "binary.bin", content: []byte{0x00, 0x01, 0x02, '\n', 'h', 'e', 'l', 'l', 'o', '\n', 'w', 'o', 'r', 'l', 'd', '\n', 0x03, 0x04}}, + {name: "large.bin", content: bytes.Repeat([]byte("0123456789abcdef"), 4096)}, + } + + for _, file := range files { + if err := os.WriteFile(filepath.Join(repoDir, file.name), file.content, 0o644); err != nil { + t.Fatalf("WriteFile(%q): %v", file.name, err) + } + } + + runGit(t, repoDir, "add", ".") + runGit(t, repoDir, "commit", "-m", "initial") + + // Get blob SHAs for each file. + blobs := map[string]plumbing.Hash{} + for _, file := range files { + name := file.name + out, err := exec.Command("git", "-C", repoDir, "rev-parse", "HEAD:"+name).Output() + if err != nil { + t.Fatalf("rev-parse %s: %v", name, err) + } + sha := strings.TrimSpace(string(out)) + blobs[name] = plumbing.NewHash(sha) + } + + return repoDir, blobs +} + +func TestCatfileReader(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + + ids := []plumbing.Hash{ + blobs["hello.txt"], + blobs["empty.txt"], + blobs["binary.bin"], + blobs["large.bin"], + } + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatalf("newCatfileReader: %v", err) + } + defer cr.Close() + + // hello.txt + size, missing, excluded, err := cr.Next() + if err != nil { + t.Fatalf("Next hello.txt: %v", err) + } + if missing || excluded { + t.Fatal("hello.txt unexpectedly missing") + } + if size != 12 { + t.Errorf("hello.txt size = %d, want 12", size) + } + content := make([]byte, size) + if _, err := io.ReadFull(cr, content); err != nil { + t.Fatalf("ReadFull hello.txt: %v", err) + } + if string(content) != "hello world\n" { + t.Errorf("hello.txt content = %q", content) + } + + // empty.txt + size, missing, excluded, err = cr.Next() + if err != nil { + t.Fatalf("Next empty.txt: %v", err) + } + if missing || excluded { + t.Fatal("empty.txt unexpectedly missing") + } + if size != 0 { + t.Errorf("empty.txt size = %d, want 0", size) + } + + // binary.bin — read content and verify binary data survives. + size, missing, excluded, err = cr.Next() + if err != nil { + t.Fatalf("Next binary.bin: %v", err) + } + if missing || excluded { + t.Fatal("binary.bin unexpectedly missing") + } + binContent := make([]byte, size) + if _, err := io.ReadFull(cr, binContent); err != nil { + t.Fatalf("ReadFull binary.bin: %v", err) + } + if binContent[0] != 0x00 || binContent[3] != '\n' { + t.Errorf("binary.bin unexpected leading bytes: %x", binContent[:5]) + } + + // large.bin + size, missing, excluded, err = cr.Next() + if err != nil { + t.Fatalf("Next large.bin: %v", err) + } + if missing || excluded { + t.Fatal("large.bin unexpectedly missing") + } + if size != 64*1024 { + t.Errorf("large.bin size = %d, want %d", size, 64*1024) + } + largeContent := make([]byte, size) + if _, err := io.ReadFull(cr, largeContent); err != nil { + t.Fatalf("ReadFull large.bin: %v", err) + } + + // EOF after all entries. + _, _, _, err = cr.Next() + if err != io.EOF { + t.Errorf("expected io.EOF after last entry, got %v", err) + } +} + +func TestCatfileReader_Skip(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + + ids := []plumbing.Hash{ + blobs["hello.txt"], + blobs["large.bin"], + blobs["binary.bin"], + } + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatalf("newCatfileReader: %v", err) + } + defer cr.Close() + + // Skip hello.txt by calling Next again without reading. + _, _, _, err = cr.Next() + if err != nil { + t.Fatalf("Next hello.txt: %v", err) + } + + // Skip large.bin too. + size, _, _, err := cr.Next() + if err != nil { + t.Fatalf("Next large.bin: %v", err) + } + if size != 64*1024 { + t.Errorf("large.bin size = %d, want %d", size, 64*1024) + } + + // Read binary.bin after skipping two entries. + size, _, _, err = cr.Next() + if err != nil { + t.Fatalf("Next binary.bin: %v", err) + } + content := make([]byte, size) + if _, err := io.ReadFull(cr, content); err != nil { + t.Fatalf("ReadFull binary.bin: %v", err) + } + if content[0] != 0x00 { + t.Errorf("binary.bin first byte = %x, want 0x00", content[0]) + } +} + +func TestCatfileReader_Missing(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + + fakeHash := plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef") + ids := []plumbing.Hash{ + blobs["hello.txt"], + fakeHash, + blobs["empty.txt"], + } + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{}) + if err != nil { + t.Fatalf("newCatfileReader: %v", err) + } + defer cr.Close() + + // hello.txt — read normally. + size, missing, excluded, err := cr.Next() + if err != nil || missing || excluded { + t.Fatalf("Next hello.txt: err=%v missing=%v excluded=%v", err, missing, excluded) + } + content := make([]byte, size) + if _, err := io.ReadFull(cr, content); err != nil { + t.Fatalf("ReadFull hello.txt: %v", err) + } + if string(content) != "hello world\n" { + t.Errorf("hello.txt = %q", content) + } + + // fakeHash — missing. + _, missing, excluded, err = cr.Next() + if err != nil { + t.Fatalf("Next fakeHash: %v", err) + } + if excluded { + t.Error("expected fakeHash to be missing, not excluded") + } + if !missing { + t.Error("expected fakeHash to be missing") + } + + // empty.txt — still works after missing entry. + size, missing, excluded, err = cr.Next() + if err != nil || missing || excluded { + t.Fatalf("Next empty.txt: err=%v missing=%v excluded=%v", err, missing, excluded) + } + if size != 0 { + t.Errorf("empty.txt size = %d, want 0", size) + } +} + +func TestCatfileReader_Excluded(t *testing.T) { + t.Parallel() + + repoDir, blobs := createTestRepo(t) + + ids := []plumbing.Hash{ + blobs["large.bin"], + blobs["hello.txt"], + } + + cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{filterSpec: "blob:limit=1k"}) + if err != nil { + t.Fatalf("newCatfileReader: %v", err) + } + defer cr.Close() + + _, missing, excluded, err := cr.Next() + if err != nil { + t.Fatalf("Next large.bin: %v", err) + } + if missing { + t.Fatal("large.bin unexpectedly missing") + } + if !excluded { + t.Fatal("large.bin unexpectedly included") + } + + size, missing, excluded, err := cr.Next() + if err != nil { + t.Fatalf("Next hello.txt: %v", err) + } + if missing || excluded { + t.Fatalf("hello.txt unexpectedly skipped: missing=%v excluded=%v", missing, excluded) + } + content := make([]byte, size) + if _, err := io.ReadFull(cr, content); err != nil { + t.Fatalf("ReadFull hello.txt: %v", err) + } + if string(content) != "hello world\n" { + t.Errorf("hello.txt = %q", content) + } +} diff --git a/gitindex/clone.go b/gitindex/clone.go index 8bee54de0..f99f2f2d9 100644 --- a/gitindex/clone.go +++ b/gitindex/clone.go @@ -18,37 +18,14 @@ import ( "bytes" "fmt" "log" - "maps" "os" "os/exec" "path/filepath" - "sort" git "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/config" ) -// Updates the zoekt.* git config options after a repo is cloned. -// Once a repo is cloned, we can no longer use the --config flag to update all -// of it's zoekt.* settings at once. `git config` is limited to one option at once. -func updateZoektGitConfig(repoDest string, settings map[string]string) error { - var keys []string - for k := range settings { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - if settings[k] != "" { - if err := exec.Command("git", "-C", repoDest, "config", k, settings[k]).Run(); err != nil { - return err - } - } - } - - return nil -} - // CloneRepo clones one repository, adding the given config // settings. It returns the bare repo directory. The `name` argument // determines where the repo is stored relative to `destDir`. Returns @@ -61,32 +38,21 @@ func CloneRepo(destDir, name, cloneURL string, settings map[string]string) (stri repoDest := filepath.Join(parent, filepath.Base(name)+".git") if _, err := os.Lstat(repoDest); err == nil { - // Repository exists, ensure settings are in sync including the clone URL - settings := maps.Clone(settings) - settings["remote.origin.url"] = cloneURL - if err := updateZoektGitConfig(repoDest, settings); err != nil { + // Repository exists, ensure zoekt settings are in sync. + hadUpdate, err := updateZoektGitConfig(repoDest, settings) + if err != nil { return "", fmt.Errorf("failed to update repository settings: %w", err) } - return "", nil - } - - var keys []string - for k := range settings { - keys = append(keys, k) - } - sort.Strings(keys) - - var config []string - for _, k := range keys { - if settings[k] != "" { - config = append(config, "--config", k+"="+settings[k]) + if hadUpdate { + return repoDest, nil } + return "", nil } cmd := exec.Command( "git", "clone", "--bare", "--verbose", "--progress", ) - cmd.Args = append(cmd.Args, config...) + cmd.Args = append(cmd.Args, cloneConfigArgs(settings)...) cmd.Args = append(cmd.Args, cloneURL, repoDest) // Prevent prompting diff --git a/gitindex/clone_config.go b/gitindex/clone_config.go new file mode 100644 index 000000000..d178e2f3e --- /dev/null +++ b/gitindex/clone_config.go @@ -0,0 +1,93 @@ +package gitindex + +import ( + "bytes" + "errors" + "fmt" + "maps" + "os/exec" + "slices" + "strings" +) + +func sortedKeys(settings map[string]string) []string { + return slices.Sorted(maps.Keys(settings)) +} + +func cloneConfigArgs(settings map[string]string) []string { + args := make([]string, 0, len(settings)*2) + for _, key := range sortedKeys(settings) { + if value := settings[key]; value != "" { + args = append(args, "--config", key+"="+value) + } + } + return args +} + +// updateZoektGitConfig applies zoekt.* settings to an existing clone. +// It returns whether the repository config changed. +func updateZoektGitConfig(repoDest string, settings map[string]string) (bool, error) { + changed := false + for _, key := range sortedKeys(settings) { + updated, err := syncGitConfigOption(repoDest, key, settings[key]) + if err != nil { + return false, err + } + changed = changed || updated + } + return changed, nil +} + +func syncGitConfigOption(repoDest, key, value string) (bool, error) { + current, ok, err := repoConfigValue(repoDest, key) + if err != nil { + return false, err + } + + if value == "" { + if !ok { + return false, nil + } + if err := unsetRepoConfigValue(repoDest, key); err != nil { + return false, err + } + return true, nil + } + + if ok && current == value { + return false, nil + } + if err := setRepoConfigValue(repoDest, key, value); err != nil { + return false, err + } + return true, nil +} + +func repoConfigValue(repoDest, key string) (string, bool, error) { + cmd := exec.Command("git", "-C", repoDest, "config", "--get", key) + var out bytes.Buffer + cmd.Stdout = &out + if err := cmd.Run(); err == nil { + return strings.TrimSuffix(out.String(), "\n"), true, nil + } else { + var exitErr *exec.ExitError + if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 { + return "", false, nil + } + return "", false, fmt.Errorf("git config --get %q: %w", key, err) + } +} + +func setRepoConfigValue(repoDest, key, value string) error { + if err := exec.Command("git", "-C", repoDest, "config", "--replace-all", key, value).Run(); err != nil { + return fmt.Errorf("git config --replace-all %q: %w", key, err) + } + return nil +} + +func unsetRepoConfigValue(repoDest, key string) error { + if err := exec.Command("git", "-C", repoDest, "config", "--unset-all", key).Run(); err != nil { + return fmt.Errorf("git config --unset-all %q: %w", key, err) + } + return nil +} diff --git a/gitindex/clone_test.go b/gitindex/clone_test.go index f1aebca67..02ed68b7f 100644 --- a/gitindex/clone_test.go +++ b/gitindex/clone_test.go @@ -22,6 +22,8 @@ import ( ) func TestSetRemote(t *testing.T) { + t.Parallel() + dir := t.TempDir() script := `mkdir orig diff --git a/gitindex/delete_test.go b/gitindex/delete_test.go index 51c151e31..ebc9c2152 100644 --- a/gitindex/delete_test.go +++ b/gitindex/delete_test.go @@ -8,6 +8,8 @@ import ( ) func TestDeleteRepos(t *testing.T) { + t.Parallel() + dir := t.TempDir() if err := createSubmoduleRepo(dir); err != nil { diff --git a/gitindex/ignore_test.go b/gitindex/ignore_test.go index 862507753..b48324439 100644 --- a/gitindex/ignore_test.go +++ b/gitindex/ignore_test.go @@ -9,11 +9,10 @@ import ( "reflect" "testing" - "github.com/sourcegraph/zoekt/query" - "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" - "github.com/sourcegraph/zoekt/shards" + "github.com/sourcegraph/zoekt/index" + "github.com/sourcegraph/zoekt/query" + "github.com/sourcegraph/zoekt/search" ) func createSourcegraphignoreRepo(dir string) error { @@ -49,6 +48,8 @@ git update-ref refs/meta/config HEAD } func TestIgnore(t *testing.T) { + t.Parallel() + dir := t.TempDir() if err := createSourcegraphignoreRepo(dir); err != nil { @@ -57,7 +58,7 @@ func TestIgnore(t *testing.T) { indexDir := t.TempDir() - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, RepositoryDescription: zoekt.Repository{ Name: "repo", @@ -77,7 +78,7 @@ func TestIgnore(t *testing.T) { t.Fatalf("IndexGitRepo: %v", err) } - searcher, err := shards.NewDirectorySearcher(indexDir) + searcher, err := search.NewDirectorySearcher(indexDir) if err != nil { t.Fatal("NewDirectorySearcher", err) } diff --git a/gitindex/index.go b/gitindex/index.go index 5bb499c63..5cdd24641 100644 --- a/gitindex/index.go +++ b/gitindex/index.go @@ -33,15 +33,15 @@ import ( "strings" "github.com/go-git/go-billy/v5/osfs" + "github.com/go-git/go-git/v5/config" + "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/cache" + "github.com/go-git/go-git/v5/plumbing/object" "github.com/go-git/go-git/v5/storage/filesystem" + "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" "github.com/sourcegraph/zoekt/ignore" - - "github.com/go-git/go-git/v5/config" - "github.com/go-git/go-git/v5/plumbing" - "github.com/go-git/go-git/v5/plumbing/object" + "github.com/sourcegraph/zoekt/index" git "github.com/go-git/go-git/v5" ) @@ -194,6 +194,23 @@ func getCommit(repo *git.Repository, prefix, ref string) (*object.Commit, error) return commitObj, nil } +func plainOpenRepo(repoDir string) (*git.Repository, error) { + // Try repoDir as the repository root first so bare repositories open + // correctly. If repoDir itself is not a repository, fall back to searching + // for a .git entry to preserve compatibility with worktree paths. + repo, err := git.PlainOpenWithOptions(repoDir, &git.PlainOpenOptions{ + EnableDotGitCommonDir: true, + }) + if err == nil || !errors.Is(err, git.ErrRepositoryNotExists) { + return repo, err + } + + return git.PlainOpenWithOptions(repoDir, &git.PlainOpenOptions{ + DetectDotGit: true, + EnableDotGitCommonDir: true, + }) +} + func configLookupRemoteURL(cfg *config.Config, key string) string { rc := cfg.Remotes[key] if rc == nil || len(rc.URLs) == 0 { @@ -205,7 +222,7 @@ func configLookupRemoteURL(cfg *config.Config, key string) string { var sshRelativeURLRegexp = regexp.MustCompile(`^([^@]+)@([^:]+):(.*)$`) func setTemplatesFromConfig(desc *zoekt.Repository, repoDir string) error { - repo, err := git.PlainOpen(repoDir) + repo, err := plainOpenRepo(repoDir) if err != nil { return err } @@ -215,6 +232,19 @@ func setTemplatesFromConfig(desc *zoekt.Repository, repoDir string) error { return err } + return setTemplatesFromRepoConfig(desc, cfg) +} + +func setTemplatesFromRepo(desc *zoekt.Repository, repo *git.Repository, repoDir string) error { + cfg, err := repo.Config() + if err == nil { + return setTemplatesFromRepoConfig(desc, cfg) + } + + return setTemplatesFromConfig(desc, repoDir) +} + +func setTemplatesFromRepoConfig(desc *zoekt.Repository, cfg *config.Config) error { sec := cfg.Raw.Section("zoekt") webURLStr := sec.Options.Get("web-url") @@ -263,6 +293,8 @@ func setTemplatesFromConfig(desc *zoekt.Repository, repoDir string) error { desc.ID = uint32(id) } + desc.TenantID, _ = strconv.Atoi(sec.Options.Get("tenantID")) + if desc.RawConfig == nil { desc.RawConfig = map[string]string{} } @@ -295,6 +327,44 @@ func setTemplatesFromConfig(desc *zoekt.Repository, repoDir string) error { return nil } +// This attempts to get a repo URL similar to the main repository template processing as in setTemplatesFromConfig() +func normalizeSubmoduleRemoteURL(cfg *config.Config) (string, error) { + sec := cfg.Raw.Section("zoekt") + remoteURL := sec.Options.Get("web-url") + if remoteURL == "" { + // fall back to "origin" remote + remoteURL = configLookupRemoteURL(cfg, "origin") + if remoteURL == "" { + return "", nil + } + } + + if sm := sshRelativeURLRegexp.FindStringSubmatch(remoteURL); sm != nil { + user := sm[1] + host := sm[2] + path := sm[3] + + remoteURL = fmt.Sprintf("ssh+git://%s@%s/%s", user, host, path) + } + + u, err := url.Parse(remoteURL) + if err != nil { + return "", fmt.Errorf("unable to parse remote URL %q: %w", remoteURL, err) + } + + if u.Scheme == "ssh+git" { + u.Scheme = "https" + u.User = nil + } + + // Assume we cannot build templates for this URL, leave it empty + if u.Scheme == "" { + return "", nil + } + + return u.String(), nil +} + // SetTemplatesFromOrigin fills in templates based on the origin URL. func SetTemplatesFromOrigin(desc *zoekt.Repository, u *url.URL) error { desc.Name = filepath.Join(u.Host, strings.TrimSuffix(u.Path, ".git")) @@ -328,7 +398,7 @@ type Options struct { RepoCacheDir string // Indexing options. - BuildOptions build.Options + BuildOptions index.Options // Prefix of the branch to index, e.g. `remotes/origin`. BranchPrefix string @@ -424,13 +494,11 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) { opts.BuildOptions.RepositoryDescription.Source = opts.RepoDir var repo *git.Repository - // TODO: this now defaults to on since we found a bug in it. Once we have - // fixed openRepo default to false. - legacyRepoOpen := cmp.Or(os.Getenv("ZOEKT_DISABLE_GOGIT_OPTIMIZATION"), "true") + legacyRepoOpen := cmp.Or(os.Getenv("ZOEKT_DISABLE_GOGIT_OPTIMIZATION"), "false") if b, err := strconv.ParseBool(legacyRepoOpen); b || err != nil { - repo, err = git.PlainOpen(opts.RepoDir) + repo, err = plainOpenRepo(opts.RepoDir) if err != nil { - return false, fmt.Errorf("git.PlainOpen: %w", err) + return false, fmt.Errorf("plainOpenRepo: %w", err) } } else { var repoCloser io.Closer @@ -441,8 +509,8 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) { defer repoCloser.Close() } - if err := setTemplatesFromConfig(&opts.BuildOptions.RepositoryDescription, opts.RepoDir); err != nil { - log.Printf("setTemplatesFromConfig(%s): %s", opts.RepoDir, err) + if err := setTemplatesFromRepo(&opts.BuildOptions.RepositoryDescription, repo, opts.RepoDir); err != nil { + log.Printf("setTemplatesFromRepo(%s): %s", opts.RepoDir, err) } branches, err := expandBranches(repo, opts.Branches, opts.BranchPrefix) @@ -471,7 +539,7 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) { if opts.Incremental { state, _ := opts.BuildOptions.IndexState() - if state == build.IndexStateEqual { + if state == index.IndexStateEqual { log.Printf("incremental: skipping repository=%q reason=%s", opts.BuildOptions.RepositoryDescription.Name, state) return false, nil } @@ -515,8 +583,13 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) { tpl := opts.BuildOptions.RepositoryDescription if path != "" { tpl = zoekt.Repository{URL: info.URL.String()} - if err := SetTemplatesFromOrigin(&tpl, info.URL); err != nil { - log.Printf("setTemplatesFromOrigin(%s, %s): %s", path, info.URL, err) + if info.URL.String() != "" { + if err := SetTemplatesFromOrigin(&tpl, info.URL); err != nil { + log.Printf("setTemplatesFromOrigin(%s, %s): %s", path, info.URL, err) + } + } + if tpl.Name == "" { + tpl.Name = path } } opts.BuildOptions.SubRepositories[path] = &tpl @@ -532,7 +605,7 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) { } } - builder, err := build.NewBuilder(opts.BuildOptions) + builder, err := index.NewBuilder(opts.BuildOptions) if err != nil { return false, fmt.Errorf("build.NewBuilder: %w", err) } @@ -562,28 +635,129 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) { sort.Strings(names) names = uniq(names) - log.Printf("attempting to index %d total files", totalFiles) - for idx, name := range names { - keys := fileKeys[name] - - for _, key := range keys { - doc, err := createDocument(key, repos, opts.BuildOptions) - if err != nil { - return false, err + // Separate main-repo keys from submodule keys, collecting blob SHAs + // for the main repo so we can stream them via git cat-file --batch. + // ZOEKT_DISABLE_CATFILE_BATCH=true falls back to the go-git path for + // all files, useful as a kill switch if the cat-file path causes issues. + // + // 2026-04-02(keegan) we are regularly seeing git growing to over 9GB in + // memory usage in our production cluster. Disabling by default until the + // issue is resolved. + catfileBatchDisabled := cmp.Or(os.Getenv("ZOEKT_DISABLE_CATFILE_BATCH"), "true") + useCatfileBatch := true + if disabled, _ := strconv.ParseBool(catfileBatchDisabled); disabled { + useCatfileBatch = false + log.Printf("cat-file batch disabled via ZOEKT_DISABLE_CATFILE_BATCH, using go-git") + } + + mainRepoKeys := make([]fileKey, 0, totalFiles) + mainRepoIDs := make([]plumbing.Hash, 0, totalFiles) + var submoduleKeys []fileKey + + for _, name := range names { + for _, key := range fileKeys[name] { + if useCatfileBatch && key.SubRepoPath == "" { + mainRepoKeys = append(mainRepoKeys, key) + mainRepoIDs = append(mainRepoIDs, key.ID) + } else { + submoduleKeys = append(submoduleKeys, key) } + } + } - if err := builder.Add(doc); err != nil { - return false, fmt.Errorf("error adding document with name %s: %w", key.FullPath(), err) - } + log.Printf("attempting to index %d total files (%d via cat-file, %d via go-git)", totalFiles, len(mainRepoIDs), len(submoduleKeys)) - if idx%10_000 == 0 { - builder.CheckMemoryUsage() - } + // Stream main-repo blobs via pipelined cat-file --batch --buffer. + // Large blobs are skipped without reading content into memory. + if len(mainRepoIDs) > 0 { + crOpts := catfileReaderOptions{ + filterSpec: catfileFilterSpec(opts), + } + cr, err := newCatfileReader(opts.RepoDir, mainRepoIDs, crOpts) + if err != nil { + return false, fmt.Errorf("newCatfileReader: %w", err) + } + + if err := indexCatfileBlobs(cr, mainRepoKeys, repos, opts, builder); err != nil { + return false, err } } + + // Index submodule blobs via go-git. + for idx, key := range submoduleKeys { + doc, err := createDocument(key, repos, opts.BuildOptions) + if err != nil { + return false, err + } + + if err := builder.Add(doc); err != nil { + return false, fmt.Errorf("error adding document with name %s: %w", key.FullPath(), err) + } + + if idx%10_000 == 0 { + builder.CheckMemoryUsage() + } + } + return true, builder.Finish() } +// indexCatfileBlobs streams main-repo blobs from the catfileReader into the +// builder. Large blobs are skipped without reading content into memory. +// keys must correspond 1:1 (in order) with the ids passed to newCatfileReader. +// The reader is always closed when this function returns. +func indexCatfileBlobs(cr *catfileReader, keys []fileKey, repos map[fileKey]BlobLocation, opts Options, builder *index.Builder) error { + defer cr.Close() + + slab := newContentSlab(16 << 20) // 16 MB per slab + + for idx, key := range keys { + size, missing, excluded, err := cr.Next() + if err != nil { + return fmt.Errorf("cat-file next for %s: %w", key.FullPath(), err) + } + + branches := repos[key].Branches + var doc index.Document + + if missing { + // Unexpected for local repos — may indicate corruption, shallow + // clone, or a race with git gc. Log a warning and skip. + log.Printf("warning: blob %s missing for %s", key.ID, key.FullPath()) + doc = skippedDoc(key, branches, index.SkipReasonMissing) + } else if excluded { + doc = skippedDoc(key, branches, index.SkipReasonTooLarge) + } else { + keyFullPath := key.FullPath() + if size > opts.BuildOptions.SizeMax && !opts.BuildOptions.IgnoreSizeMax(keyFullPath) { + // Skip without reading content into memory. + doc = skippedDoc(key, branches, index.SkipReasonTooLarge) + } else { + content := slab.alloc(size) + if _, err := io.ReadFull(cr, content); err != nil { + return fmt.Errorf("read blob %s: %w", keyFullPath, err) + } + doc = index.Document{ + SubRepositoryPath: key.SubRepoPath, + Name: keyFullPath, + Content: content, + Branches: branches, + } + } + } + + if err := builder.Add(doc); err != nil { + return fmt.Errorf("error adding document with name %s: %w", key.FullPath(), err) + } + + if idx%10_000 == 0 { + builder.CheckMemoryUsage() + } + } + + return nil +} + // openRepo opens a git repository in a way that's optimized for indexing. // // It copies the relevant logic from git.PlainOpen, and tweaks certain filesystem options. @@ -598,6 +772,27 @@ func openRepo(repoDir string) (*git.Repository, io.Closer, error) { return nil, nil, err } + fi, err := fs.Stat(git.GitDirName) + if err == nil && !fi.IsDir() { + return openCompatibleRepo(repoDir) + } + + return openOptimizedRepo(repoDir) +} + +func openCompatibleRepo(repoDir string) (*git.Repository, io.Closer, error) { + repo, err := plainOpenRepo(repoDir) + if err != nil { + return nil, nil, err + } + + return repo, noopCloser{}, nil +} + +func openOptimizedRepo(repoDir string) (*git.Repository, io.Closer, error) { + fs := osfs.New(repoDir) + wt := fs + // If there's a .git directory, use that as the new root. if fi, err := fs.Stat(git.GitDirName); err == nil && fi.IsDir() { if fs, err = fs.Chroot(git.GitDirName); err != nil { @@ -608,34 +803,30 @@ func openRepo(repoDir string) (*git.Repository, io.Closer, error) { s := filesystem.NewStorageWithOptions(fs, cache.NewObjectLRUDefault(), filesystem.Options{ // Cache the packfile handles, preventing the packfile from being opened then closed on every object access KeepDescriptors: true, - // Disable caching for most objects, by setting the threshold to 1 byte. This avoids allocating a bunch of - // in-memory objects that are unlikely to be reused, since we only read each file once. Note: go-git still - // proactively caches objects under 16KB (see smallObjectThreshold in packfile logic). - LargeObjectThreshold: 1, }) // Because we're keeping descriptors open, we need to close the storage object when we're done. - repo, err := git.Open(s, fs) + repo, err := git.Open(s, wt) return repo, s, err } -type repoPathRanks struct { - MeanRank float64 `json:"mean_reference_count"` - Paths map[string]float64 `json:"paths"` -} +type noopCloser struct{} -// rank returns the rank for a given path. It uses these rules: -// - If we have a concrete rank for this file, always use it -// - If there's no rank, and it's a low priority file like a test, then use rank 0 -// - Otherwise use the mean rank of this repository, to avoid giving it a big disadvantage -func (r repoPathRanks) rank(path string, content []byte) float64 { - if rank, ok := r.Paths[path]; ok { - return rank - } else if build.IsLowPriority(path, content) { - return 0.0 - } else { - return r.MeanRank +func (noopCloser) Close() error { return nil } + +func catfileFilterSpec(opts Options) string { + // Can't filter by size if we have large file exceptions + if len(opts.BuildOptions.LargeFiles) > 0 { + return "" + } + + if opts.BuildOptions.SizeMax <= 0 { + return "" } + + // Git's blob:limit filter excludes blobs whose size is >= the given limit, + // while zoekt indexes files up to and including SizeMax bytes. + return fmt.Sprintf("blob:limit=%d", int64(opts.BuildOptions.SizeMax)+1) } func newIgnoreMatcher(tree *object.Tree) (*ignore.Matcher, error) { @@ -709,7 +900,7 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f // If it isn't consistent, that we can't proceed with a delta build (and the caller should fall back to a // normal one). - if !build.BranchNamesEqual(existingRepository.Branches, options.BuildOptions.RepositoryDescription.Branches) { + if !index.BranchNamesEqual(existingRepository.Branches, options.BuildOptions.RepositoryDescription.Branches) { var existingBranchNames []string for _, b := range existingRepository.Branches { existingBranchNames = append(existingBranchNames, b.Name) @@ -727,7 +918,7 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f } // Check if the build options hash does not match the repository metadata's hash - // If it does not match then one or more index options has changed and will require a normal build instead of a delta build + // If it does not index then one or more index options has changed and will require a normal build instead of a delta build if options.BuildOptions.GetHash() != existingRepository.IndexOptions { return nil, nil, nil, fmt.Errorf("one or more index options previously stored for repository %s (ID: %d) does not match the index options for this requested build; These index option updates are incompatible with delta build. new index options: %+v", existingRepository.Name, existingRepository.ID, options.BuildOptions.HashOptions()) } @@ -735,10 +926,15 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f // branch => (path, sha1) => repo. repos = map[fileKey]BlobLocation{} + branches, err := expandBranches(repository, options.Branches, options.BranchPrefix) + if err != nil { + return nil, nil, nil, fmt.Errorf("expandBranches: %w", err) + } + // branch name -> git worktree at most current commit - branchToCurrentTree := make(map[string]*object.Tree, len(options.Branches)) + branchToCurrentTree := make(map[string]*object.Tree, len(branches)) - for _, b := range options.Branches { + for _, b := range branches { commit, err := getCommit(repository, options.BranchPrefix, b) if err != nil { return nil, nil, nil, fmt.Errorf("getting last current commit for branch %q: %w", b, err) @@ -868,10 +1064,13 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f func prepareNormalBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchVersions map[string]map[string]plumbing.Hash, err error) { var repoCache *RepoCache - if options.Submodules { + if options.Submodules && options.RepoCacheDir != "" { repoCache = NewRepoCache(options.RepoCacheDir) } + return prepareNormalBuildRecurse(options, repository, repoCache, false) +} +func prepareNormalBuildRecurse(options Options, repository *git.Repository, repoCache *RepoCache, isSubrepo bool) (repos map[fileKey]BlobLocation, branchVersions map[string]map[string]plumbing.Hash, err error) { // Branch => Repo => SHA1 branchVersions = map[string]map[string]plumbing.Hash{} @@ -880,7 +1079,22 @@ func prepareNormalBuild(options Options, repository *git.Repository) (repos map[ return nil, nil, fmt.Errorf("expandBranches: %w", err) } - rw := NewRepoWalker(repository, options.BuildOptions.RepositoryDescription.URL, repoCache) + repoURL := options.BuildOptions.RepositoryDescription.URL + + if isSubrepo { + cfg, err := repository.Config() + if err != nil { + return nil, nil, fmt.Errorf("unable to get repository config: %w", err) + } + + u, err := normalizeSubmoduleRemoteURL(cfg) + if err != nil { + return nil, nil, fmt.Errorf("failed to identify subrepository URL: %w", err) + } + repoURL = u + } + + rw := NewRepoWalker(repository, repoURL, repoCache) for _, b := range branches { commit, err := getCommit(repository, options.BranchPrefix, b) if err != nil { @@ -909,37 +1123,78 @@ func prepareNormalBuild(options Options, repository *git.Repository) (repos map[ branchVersions[b] = subVersions } + // Index submodules using go-git if we didn't do so using the repo cache + if options.Submodules && options.RepoCacheDir == "" { + worktree, err := repository.Worktree() + if err != nil { + return nil, nil, fmt.Errorf("failed to get repository worktree: %w", err) + } + + submodules, err := worktree.Submodules() + if err != nil { + return nil, nil, fmt.Errorf("failed to get submodules: %w", err) + } + + for _, submodule := range submodules { + subRepository, err := submodule.Repository() + if err != nil { + log.Printf("failed to open submodule repository: %s, %s", submodule.Config().Name, err) + continue + } + + sw, subVersions, err := prepareNormalBuildRecurse(options, subRepository, repoCache, true) + if err != nil { + log.Printf("failed to index submodule repository: %s, %s", submodule.Config().Name, err) + continue + } + + log.Printf("adding subrepository files from: %s", submodule.Config().Name) + + for k, repo := range sw { + rw.Files[fileKey{ + SubRepoPath: filepath.Join(submodule.Config().Path, k.SubRepoPath), + Path: k.Path, + ID: k.ID, + }] = repo + } + + for k, v := range subVersions { + branchVersions[filepath.Join(submodule.Config().Path, k)] = v + } + } + } + return rw.Files, branchVersions, nil } func createDocument(key fileKey, repos map[fileKey]BlobLocation, - opts build.Options, -) (zoekt.Document, error) { + opts index.Options, +) (index.Document, error) { repo := repos[key] blob, err := repo.GitRepo.BlobObject(key.ID) branches := repos[key].Branches // We filter out large documents when fetching the repo. So if an object is too large, it will not be found. if errors.Is(err, plumbing.ErrObjectNotFound) { - return skippedLargeDoc(key, branches, opts), nil + return skippedDoc(key, branches, index.SkipReasonTooLarge), nil } if err != nil { - return zoekt.Document{}, err + return index.Document{}, err } keyFullPath := key.FullPath() if blob.Size > int64(opts.SizeMax) && !opts.IgnoreSizeMax(keyFullPath) { - return skippedLargeDoc(key, branches, opts), nil + return skippedDoc(key, branches, index.SkipReasonTooLarge), nil } contents, err := blobContents(blob) if err != nil { - return zoekt.Document{}, err + return index.Document{}, err } - return zoekt.Document{ + return index.Document{ SubRepositoryPath: key.SubRepoPath, Name: keyFullPath, Content: contents, @@ -947,9 +1202,10 @@ func createDocument(key fileKey, }, nil } -func skippedLargeDoc(key fileKey, branches []string, opts build.Options) zoekt.Document { - return zoekt.Document{ - SkipReason: fmt.Sprintf("file size exceeds maximum size %d", opts.SizeMax), +// skippedDoc creates a Document placeholder for a blob that was not indexed. +func skippedDoc(key fileKey, branches []string, reason index.SkipReason) index.Document { + return index.Document{ + SkipReason: reason, Name: key.FullPath(), Branches: branches, SubRepositoryPath: key.SubRepoPath, diff --git a/gitindex/index_test.go b/gitindex/index_test.go index 340983623..d8d303751 100644 --- a/gitindex/index_test.go +++ b/gitindex/index_test.go @@ -18,7 +18,6 @@ import ( "bytes" "context" "errors" - "fmt" "net/url" "os" "os/exec" @@ -32,14 +31,17 @@ import ( "github.com/go-git/go-git/v5/plumbing" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" "github.com/sourcegraph/zoekt/ignore" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/query" - "github.com/sourcegraph/zoekt/shards" + "github.com/sourcegraph/zoekt/search" ) func TestIndexEmptyRepo(t *testing.T) { + t.Parallel() + dir := t.TempDir() cmd := exec.Command("git", "init", "-b", "master", "repo") @@ -54,7 +56,7 @@ func TestIndexEmptyRepo(t *testing.T) { } opts := Options{ RepoDir: filepath.Join(dir, "repo", ".git"), - BuildOptions: build.Options{ + BuildOptions: index.Options{ RepositoryDescription: desc, IndexDir: dir, }, @@ -66,6 +68,8 @@ func TestIndexEmptyRepo(t *testing.T) { } func TestIndexNonexistentRepo(t *testing.T) { + t.Parallel() + dir := t.TempDir() desc := zoekt.Repository{ Name: "nonexistent", @@ -73,7 +77,7 @@ func TestIndexNonexistentRepo(t *testing.T) { opts := Options{ RepoDir: "does/not/exist", Branches: []string{"main"}, - BuildOptions: build.Options{ + BuildOptions: index.Options{ RepositoryDescription: desc, IndexDir: dir, }, @@ -87,26 +91,25 @@ func TestIndexNonexistentRepo(t *testing.T) { } func TestIndexTinyRepo(t *testing.T) { + t.Parallel() + // Create a repo with one file in it. dir := t.TempDir() - executeCommand(t, dir, exec.Command("git", "init", "-b", "main", "repo")) + runGit(t, dir, "init", "-b", "main", "repo") repoDir := filepath.Join(dir, "repo") - executeCommand(t, repoDir, exec.Command("git", "config", "user.name", "Thomas")) - executeCommand(t, repoDir, exec.Command("git", "config", "user.email", "thomas@google.com")) - if err := os.WriteFile(filepath.Join(repoDir, "file1.go"), []byte("package main\n\nfunc main() {}\n"), 0644); err != nil { t.Fatalf("WriteFile: %v", err) } - executeCommand(t, repoDir, exec.Command("git", "add", ".")) - executeCommand(t, repoDir, exec.Command("git", "commit", "-m", "initial commit")) + runGit(t, repoDir, "add", ".") + runGit(t, repoDir, "commit", "-m", "initial commit") // Test that indexing accepts both the repo directory, and the .git subdirectory. for _, testDir := range []string{"repo", "repo/.git"} { opts := Options{ RepoDir: filepath.Join(dir, testDir), Branches: []string{"main"}, - BuildOptions: build.Options{ + BuildOptions: index.Options{ RepositoryDescription: zoekt.Repository{Name: "repo"}, IndexDir: dir, }, @@ -116,7 +119,7 @@ func TestIndexTinyRepo(t *testing.T) { t.Fatalf("unexpected error %v", err) } - searcher, err := shards.NewDirectorySearcher(dir) + searcher, err := search.NewDirectorySearcher(dir) if err != nil { t.Fatal("NewDirectorySearcher", err) } @@ -134,16 +137,215 @@ func TestIndexTinyRepo(t *testing.T) { } } -func executeCommand(t *testing.T, dir string, cmd *exec.Cmd) *exec.Cmd { - cmd.Dir = dir - if err := cmd.Run(); err != nil { - t.Fatalf("cmd.Run: %v", err) +func TestIndexGitRepo_Worktree(t *testing.T) { + t.Parallel() + + _, worktreeDir := initGitWorktree(t, "file1.go", "package main\n\nfunc main() {}\n") + indexDir := t.TempDir() + + opts := Options{ + RepoDir: worktreeDir, + Branches: []string{"HEAD"}, + BuildOptions: index.Options{ + RepositoryDescription: zoekt.Repository{Name: "repo"}, + IndexDir: indexDir, + }, + } + + if _, err := IndexGitRepo(opts); err != nil { + t.Fatalf("IndexGitRepo(worktree): %v", err) + } + + searcher, err := search.NewDirectorySearcher(indexDir) + if err != nil { + t.Fatal("NewDirectorySearcher", err) + } + defer searcher.Close() + + results, err := searcher.Search(context.Background(), &query.Const{Value: true}, &zoekt.SearchOptions{}) + if err != nil { + t.Fatal("search failed", err) + } + + if len(results.Files) != 1 { + t.Fatalf("got search result %v, want 1 file", results.Files) + } +} + +func TestOpenRepoVariants(t *testing.T) { + t.Parallel() + + repoDir, worktreeDir := initGitWorktree(t, "file1.go", "package main\n\nfunc main() {}\n") + bareDir := cloneBareRepo(t, repoDir) + + paths := []struct { + name string + path string + }{ + {name: "repo root", path: repoDir}, + {name: "dot git dir", path: filepath.Join(repoDir, ".git")}, + {name: "worktree root", path: worktreeDir}, + {name: "bare repo root", path: bareDir}, + } + + openers := []struct { + name string + open func(t *testing.T, repoDir string) *git.Repository + }{ + { + name: "plain", + open: func(t *testing.T, repoDir string) *git.Repository { + t.Helper() + + repo, err := plainOpenRepo(repoDir) + if err != nil { + t.Fatalf("plainOpenRepo(%q): %v", repoDir, err) + } + + return repo + }, + }, + { + name: "optimized", + open: func(t *testing.T, repoDir string) *git.Repository { + t.Helper() + + repo, closer, err := openRepo(repoDir) + if err != nil { + t.Fatalf("openRepo(%q): %v", repoDir, err) + } + t.Cleanup(func() { + _ = closer.Close() + }) + + return repo + }, + }, + } + + for _, opener := range openers { + for _, tc := range paths { + t.Run(opener.name+"/"+tc.name, func(t *testing.T) { + t.Parallel() + + repo := opener.open(t, tc.path) + + head, err := repo.Head() + if err != nil { + t.Fatalf("repo.Head(): %v", err) + } + + if _, err := repo.CommitObject(head.Hash()); err != nil { + t.Fatalf("repo.CommitObject(%s): %v", head.Hash(), err) + } + }) + } } - return cmd +} + +func TestIndexGitRepo_BareRepo_LegacyRepoOpen(t *testing.T) { + repoDir, _ := initGitWorktree(t, "file1.go", "package main\n\nfunc main() {}\n") + bareDir := cloneBareRepo(t, repoDir) + indexDir := t.TempDir() + + t.Setenv("ZOEKT_DISABLE_GOGIT_OPTIMIZATION", "true") + + opts := Options{ + RepoDir: bareDir, + Branches: []string{"main"}, + BuildOptions: index.Options{ + RepositoryDescription: zoekt.Repository{Name: "repo"}, + IndexDir: indexDir, + }, + } + + if _, err := IndexGitRepo(opts); err != nil { + t.Fatalf("IndexGitRepo(bare, legacy open): %v", err) + } + + searcher, err := search.NewDirectorySearcher(indexDir) + if err != nil { + t.Fatal("NewDirectorySearcher", err) + } + defer searcher.Close() + + results, err := searcher.Search(context.Background(), &query.Const{Value: true}, &zoekt.SearchOptions{}) + if err != nil { + t.Fatal("search failed", err) + } + + if len(results.Files) != 1 || results.Files[0].FileName != "file1.go" { + t.Fatalf("got search result %v, want file1.go", results.Files) + } +} + +func TestCatfileFilterSpec(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + name string + opts Options + want string + }{ + { + name: "size max", + opts: Options{BuildOptions: index.Options{SizeMax: 1 << 20}}, + want: "blob:limit=1048577", + }, + { + name: "large file exception disables filter", + opts: Options{BuildOptions: index.Options{SizeMax: 1 << 20, LargeFiles: []string{"*.bin"}}}, + want: "", + }, + { + name: "zero size max disables filter", + opts: Options{BuildOptions: index.Options{SizeMax: 0}}, + want: "", + }, + } { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + if got := catfileFilterSpec(tc.opts); got != tc.want { + t.Fatalf("catfileFilterSpec() = %q, want %q", got, tc.want) + } + }) + } +} + +func initGitWorktree(t *testing.T, fileName, content string) (string, string) { + t.Helper() + + dir := t.TempDir() + runGit(t, dir, "init", "-b", "main", "repo") + + repoDir := filepath.Join(dir, "repo") + if err := os.WriteFile(filepath.Join(repoDir, fileName), []byte(content), 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + runGit(t, repoDir, "config", "remote.origin.url", "git@github.com:sourcegraph/zoekt.git") + runGit(t, repoDir, "add", ".") + runGit(t, repoDir, "commit", "-m", "initial commit") + + worktreeDir := filepath.Join(dir, "wt") + runGit(t, repoDir, "worktree", "add", "-b", "worktree-branch", worktreeDir) + + return repoDir, worktreeDir +} + +func cloneBareRepo(t *testing.T, repoDir string) string { + t.Helper() + + bareDir := filepath.Join(t.TempDir(), "repo.git") + runGit(t, filepath.Dir(repoDir), "clone", "--bare", repoDir, bareDir) + + return bareDir } func TestIndexDeltaBasic(t *testing.T) { - type branchToDocumentMap map[string][]zoekt.Document + t.Parallel() + + type branchToDocumentMap map[string][]index.Document type step struct { name string @@ -152,25 +354,25 @@ func TestIndexDeltaBasic(t *testing.T) { optFn func(t *testing.T, options *Options) expectedFallbackToNormalBuild bool - expectedDocuments []zoekt.Document + expectedDocuments []index.Document } - helloWorld := zoekt.Document{Name: "hello_world.txt", Content: []byte("hello")} + helloWorld := index.Document{Name: "hello_world.txt", Content: []byte("hello")} - fruitV1 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("strawberry")} - fruitV1InFolder := zoekt.Document{Name: "the_best/best_fruit.txt", Content: fruitV1.Content} - fruitV1WithNewName := zoekt.Document{Name: "new_fruit.txt", Content: fruitV1.Content} + fruitV1 := index.Document{Name: "best_fruit.txt", Content: []byte("strawberry")} + fruitV1InFolder := index.Document{Name: "the_best/best_fruit.txt", Content: fruitV1.Content} + fruitV1WithNewName := index.Document{Name: "new_fruit.txt", Content: fruitV1.Content} - fruitV2 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("grapes")} - fruitV2InFolder := zoekt.Document{Name: "the_best/best_fruit.txt", Content: fruitV2.Content} + fruitV2 := index.Document{Name: "best_fruit.txt", Content: []byte("grapes")} + fruitV2InFolder := index.Document{Name: "the_best/best_fruit.txt", Content: fruitV2.Content} - fruitV3 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("oranges")} - fruitV4 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("apples")} + fruitV3 := index.Document{Name: "best_fruit.txt", Content: []byte("oranges")} + fruitV4 := index.Document{Name: "best_fruit.txt", Content: []byte("apples")} - foo := zoekt.Document{Name: "foo.txt", Content: []byte("bar")} + foo := index.Document{Name: "foo.txt", Content: []byte("bar")} - emptySourcegraphIgnore := zoekt.Document{Name: ignore.IgnoreFile} - sourcegraphIgnoreWithContent := zoekt.Document{Name: ignore.IgnoreFile, Content: []byte("good_content.txt")} + emptySourcegraphIgnore := index.Document{Name: ignore.IgnoreFile} + sourcegraphIgnoreWithContent := index.Document{Name: ignore.IgnoreFile, Content: []byte("good_content.txt")} for _, test := range []struct { name string @@ -184,21 +386,21 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{helloWorld, fruitV1}, + "main": []index.Document{helloWorld, fruitV1}, }, - expectedDocuments: []zoekt.Document{helloWorld, fruitV1}, + expectedDocuments: []index.Document{helloWorld, fruitV1}, }, { name: "add newer version of fruits", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV2}, + "main": []index.Document{fruitV2}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{helloWorld, fruitV2}, + expectedDocuments: []index.Document{helloWorld, fruitV2}, }, }, }, @@ -209,21 +411,21 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{foo, fruitV1InFolder}, + "main": []index.Document{foo, fruitV1InFolder}, }, - expectedDocuments: []zoekt.Document{foo, fruitV1InFolder}, + expectedDocuments: []index.Document{foo, fruitV1InFolder}, }, { name: "add newer version of fruits inside folder", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV2InFolder}, + "main": []index.Document{fruitV2InFolder}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{foo, fruitV2InFolder}, + expectedDocuments: []index.Document{foo, fruitV2InFolder}, }, }, }, @@ -234,21 +436,21 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{helloWorld, fruitV1}, + "main": []index.Document{helloWorld, fruitV1}, }, - expectedDocuments: []zoekt.Document{helloWorld, fruitV1}, + expectedDocuments: []index.Document{helloWorld, fruitV1}, }, { name: "add new file - foo", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{foo}, + "main": []index.Document{foo}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{helloWorld, fruitV1, foo}, + expectedDocuments: []index.Document{helloWorld, fruitV1, foo}, }, }, }, @@ -259,23 +461,23 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{helloWorld, fruitV1, foo}, + "main": []index.Document{helloWorld, fruitV1, foo}, }, - expectedDocuments: []zoekt.Document{helloWorld, fruitV1, foo}, + expectedDocuments: []index.Document{helloWorld, fruitV1, foo}, }, { name: "delete foo file", addedDocuments: nil, deletedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{foo}, + "main": []index.Document{foo}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{helloWorld, fruitV1}, + expectedDocuments: []index.Document{helloWorld, fruitV1}, }, }, }, @@ -286,27 +488,27 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, - "release": []zoekt.Document{fruitV2}, - "dev": []zoekt.Document{fruitV3}, + "main": []index.Document{fruitV1}, + "release": []index.Document{fruitV2}, + "dev": []index.Document{fruitV3}, }, - expectedDocuments: []zoekt.Document{fruitV1, fruitV2, fruitV3}, + expectedDocuments: []index.Document{fruitV1, fruitV2, fruitV3}, }, { name: "replace fruits v3 with v4 on 'dev', delete fruits on 'main'", addedDocuments: branchToDocumentMap{ - "dev": []zoekt.Document{fruitV4}, + "dev": []index.Document{fruitV4}, }, deletedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, + "main": []index.Document{fruitV1}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{fruitV2, fruitV4}, + expectedDocuments: []index.Document{fruitV2, fruitV4}, }, }, }, @@ -317,25 +519,25 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, - "release": []zoekt.Document{fruitV2}, + "main": []index.Document{fruitV1}, + "release": []index.Document{fruitV2}, }, - expectedDocuments: []zoekt.Document{fruitV1, fruitV2}, + expectedDocuments: []index.Document{fruitV1, fruitV2}, }, { name: "rename fruits file on 'main' + ensure that unmodified fruits file on 'release' is still searchable", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1WithNewName}, + "main": []index.Document{fruitV1WithNewName}, }, deletedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, + "main": []index.Document{fruitV1}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{fruitV1WithNewName, fruitV2}, + expectedDocuments: []index.Document{fruitV1WithNewName, fruitV2}, }, }, }, @@ -346,23 +548,23 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, - "dev": []zoekt.Document{fruitV2}, + "main": []index.Document{fruitV1}, + "dev": []index.Document{fruitV2}, }, - expectedDocuments: []zoekt.Document{fruitV1, fruitV2}, + expectedDocuments: []index.Document{fruitV1, fruitV2}, }, { name: "switch main to dev's older version of fruits + bump dev's fruits to new version", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV2}, - "dev": []zoekt.Document{fruitV3}, + "main": []index.Document{fruitV2}, + "dev": []index.Document{fruitV3}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{fruitV2, fruitV3}, + expectedDocuments: []index.Document{fruitV2, fruitV3}, }, }, }, @@ -373,10 +575,10 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1, foo}, - "dev": []zoekt.Document{helloWorld}, + "main": []index.Document{fruitV1, foo}, + "dev": []index.Document{helloWorld}, }, - expectedDocuments: []zoekt.Document{fruitV1, foo, helloWorld}, + expectedDocuments: []index.Document{fruitV1, foo, helloWorld}, }, { name: "first no-op (normal build -> delta build)", @@ -384,7 +586,7 @@ func TestIndexDeltaBasic(t *testing.T) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{fruitV1, foo, helloWorld}, + expectedDocuments: []index.Document{fruitV1, foo, helloWorld}, }, { name: "second no-op (delta build -> delta build)", @@ -392,7 +594,7 @@ func TestIndexDeltaBasic(t *testing.T) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{fruitV1, foo, helloWorld}, + expectedDocuments: []index.Document{fruitV1, foo, helloWorld}, }, }, }, @@ -403,14 +605,14 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "attempt delta build on a repository that hasn't been indexed yet", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{helloWorld}, + "main": []index.Document{helloWorld}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, expectedFallbackToNormalBuild: true, - expectedDocuments: []zoekt.Document{helloWorld}, + expectedDocuments: []index.Document{helloWorld}, }, }, }, @@ -421,17 +623,17 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, - "release": []zoekt.Document{fruitV2}, - "dev": []zoekt.Document{fruitV3}, + "main": []index.Document{fruitV1}, + "release": []index.Document{fruitV2}, + "dev": []index.Document{fruitV3}, }, - expectedDocuments: []zoekt.Document{fruitV1, fruitV2, fruitV3}, + expectedDocuments: []index.Document{fruitV1, fruitV2, fruitV3}, }, { name: "try delta build after dropping 'main' branch from index ", addedDocuments: branchToDocumentMap{ - "release": []zoekt.Document{fruitV4}, + "release": []index.Document{fruitV4}, }, optFn: func(t *testing.T, o *Options) { o.Branches = []string{"HEAD", "release", "dev"} // a bit of a hack to override it this way, but it gets the job done @@ -439,7 +641,32 @@ func TestIndexDeltaBasic(t *testing.T) { }, expectedFallbackToNormalBuild: true, - expectedDocuments: []zoekt.Document{fruitV3, fruitV4}, + expectedDocuments: []index.Document{fruitV3, fruitV4}, + }, + }, + }, + { + name: "should expand branches correctly when using wildcards in branch names", + branches: []string{"release/1", "release/2"}, + steps: []step{ + { + name: "setup", + addedDocuments: branchToDocumentMap{ + "release/1": []index.Document{fruitV1}, + "release/2": []index.Document{fruitV2}, + }, + + expectedDocuments: []index.Document{fruitV1, fruitV2}, + }, + { + name: "try delta build with wildcard in branches", + optFn: func(t *testing.T, o *Options) { + // use a wildcard here + o.Branches = []string{"HEAD", "release/*"} + o.BuildOptions.IsDelta = true + }, + + expectedDocuments: []index.Document{fruitV1, fruitV2}, }, }, }, @@ -450,15 +677,15 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, + "main": []index.Document{fruitV1}, }, - expectedDocuments: []zoekt.Document{fruitV1}, + expectedDocuments: []index.Document{fruitV1}, }, { name: "try delta build after updating Disable CTags index option", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV2}, + "main": []index.Document{fruitV2}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true @@ -466,12 +693,12 @@ func TestIndexDeltaBasic(t *testing.T) { }, expectedFallbackToNormalBuild: true, - expectedDocuments: []zoekt.Document{fruitV2}, + expectedDocuments: []index.Document{fruitV2}, }, { name: "try delta build after reverting Disable CTags index option", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV3}, + "main": []index.Document{fruitV3}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true @@ -479,7 +706,7 @@ func TestIndexDeltaBasic(t *testing.T) { }, expectedFallbackToNormalBuild: true, - expectedDocuments: []zoekt.Document{fruitV3}, + expectedDocuments: []index.Document{fruitV3}, }, }, }, @@ -490,15 +717,15 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, + "main": []index.Document{fruitV1}, }, - expectedDocuments: []zoekt.Document{fruitV1}, + expectedDocuments: []index.Document{fruitV1}, }, { name: "try delta build after updating Disable CTags index option", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV2}, + "main": []index.Document{fruitV2}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true @@ -506,19 +733,19 @@ func TestIndexDeltaBasic(t *testing.T) { }, expectedFallbackToNormalBuild: true, - expectedDocuments: []zoekt.Document{fruitV2}, + expectedDocuments: []index.Document{fruitV2}, }, { name: "try another delta build while CTags is still disabled", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV3}, + "main": []index.Document{fruitV3}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true o.BuildOptions.DisableCTags = true }, - expectedDocuments: []zoekt.Document{fruitV3}, + expectedDocuments: []index.Document{fruitV3}, }, }, }, @@ -529,22 +756,22 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{emptySourcegraphIgnore}, + "main": []index.Document{emptySourcegraphIgnore}, }, - expectedDocuments: []zoekt.Document{emptySourcegraphIgnore}, + expectedDocuments: []index.Document{emptySourcegraphIgnore}, }, { name: "attempt delta build after modifying ignore file", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{sourcegraphIgnoreWithContent}, + "main": []index.Document{sourcegraphIgnoreWithContent}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, expectedFallbackToNormalBuild: true, - expectedDocuments: []zoekt.Document{sourcegraphIgnoreWithContent}, + expectedDocuments: []index.Document{sourcegraphIgnoreWithContent}, }, }, }, @@ -555,37 +782,37 @@ func TestIndexDeltaBasic(t *testing.T) { { name: "setup: first shard", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{foo}, + "main": []index.Document{foo}, }, - expectedDocuments: []zoekt.Document{foo}, + expectedDocuments: []index.Document{foo}, }, { name: "setup: second shard (delta)", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV1}, + "main": []index.Document{fruitV1}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{foo, fruitV1}, + expectedDocuments: []index.Document{foo, fruitV1}, }, { name: "setup: third shard (delta)", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{helloWorld}, + "main": []index.Document{helloWorld}, }, optFn: func(t *testing.T, o *Options) { o.BuildOptions.IsDelta = true }, - expectedDocuments: []zoekt.Document{foo, fruitV1, helloWorld}, + expectedDocuments: []index.Document{foo, fruitV1, helloWorld}, }, { name: "attempt another delta build after we already blew past the shard threshold", addedDocuments: branchToDocumentMap{ - "main": []zoekt.Document{fruitV2InFolder}, + "main": []index.Document{fruitV2InFolder}, }, optFn: func(t *testing.T, o *Options) { o.DeltaShardNumberFallbackThreshold = 2 @@ -593,7 +820,7 @@ func TestIndexDeltaBasic(t *testing.T) { }, expectedFallbackToNormalBuild: true, - expectedDocuments: []zoekt.Document{foo, fruitV1, helloWorld, fruitV2InFolder}, + expectedDocuments: []index.Document{foo, fruitV1, helloWorld, fruitV2InFolder}, }, }, }, @@ -607,13 +834,11 @@ func TestIndexDeltaBasic(t *testing.T) { repositoryDir := t.TempDir() // setup: initialize the repository and all of its branches - runScript(t, repositoryDir, "git init -b master") - runScript(t, repositoryDir, fmt.Sprintf("git config user.email %q", "you@example.com")) - runScript(t, repositoryDir, fmt.Sprintf("git config user.name %q", "Your Name")) + runGit(t, repositoryDir, "init", "-b", "master") for _, b := range test.branches { - runScript(t, repositoryDir, fmt.Sprintf("git checkout -b %q", b)) - runScript(t, repositoryDir, fmt.Sprintf("git commit --allow-empty -m %q", "empty commit")) + runGit(t, repositoryDir, "checkout", "-b", b) + runGit(t, repositoryDir, "commit", "--allow-empty", "-m", "empty commit") } for _, step := range test.steps { @@ -623,7 +848,7 @@ func TestIndexDeltaBasic(t *testing.T) { hadChange := false - runScript(t, repositoryDir, fmt.Sprintf("git checkout %q", b)) + runGit(t, repositoryDir, "checkout", b) for _, d := range step.deletedDocuments[b] { hadChange = true @@ -634,8 +859,6 @@ func TestIndexDeltaBasic(t *testing.T) { if err != nil { t.Fatalf("deleting file %q: %s", d.Name, err) } - - runScript(t, repositoryDir, fmt.Sprintf("git add %q", file)) } for _, d := range step.addedDocuments[b] { @@ -652,19 +875,18 @@ func TestIndexDeltaBasic(t *testing.T) { if err != nil { t.Fatalf("writing file %q: %s", d.Name, err) } - - runScript(t, repositoryDir, fmt.Sprintf("git add %q", file)) } if !hadChange { continue } - runScript(t, repositoryDir, fmt.Sprintf("git commit -m %q", step.name)) + runGit(t, repositoryDir, "add", "-A") + runGit(t, repositoryDir, "commit", "-m", step.name) } // setup: prepare indexOptions with given overrides - buildOptions := build.Options{ + buildOptions := index.Options{ IndexDir: indexDir, RepositoryDescription: zoekt.Repository{ Name: "repository", @@ -724,7 +946,7 @@ func TestIndexDeltaBasic(t *testing.T) { // // then, compare returned set of documents with the expected set for the step and see if they agree - ss, err := shards.NewDirectorySearcher(indexDir) + ss, err := search.NewDirectorySearcher(indexDir) if err != nil { t.Fatalf("NewDirectorySearcher(%s): %s", indexDir, err) } @@ -736,15 +958,15 @@ func TestIndexDeltaBasic(t *testing.T) { t.Fatalf("Search: %s", err) } - var receivedDocuments []zoekt.Document + var receivedDocuments []index.Document for _, f := range result.Files { - receivedDocuments = append(receivedDocuments, zoekt.Document{ + receivedDocuments = append(receivedDocuments, index.Document{ Name: f.FileName, Content: f.Content, }) } - for _, docs := range [][]zoekt.Document{step.expectedDocuments, receivedDocuments} { + for _, docs := range [][]index.Document{step.expectedDocuments, receivedDocuments} { sort.Slice(docs, func(i, j int) bool { a, b := docs[i], docs[j] @@ -763,7 +985,7 @@ func TestIndexDeltaBasic(t *testing.T) { } compareOptions := []cmp.Option{ - cmpopts.IgnoreFields(zoekt.Document{}, "Branches"), + cmpopts.IgnoreFields(index.Document{}, "Branches"), cmpopts.EquateEmpty(), } @@ -776,64 +998,7 @@ func TestIndexDeltaBasic(t *testing.T) { } } -func TestRepoPathRanks(t *testing.T) { - pathRanks := repoPathRanks{ - Paths: map[string]float64{ - "search.go": 10.23, - "internal/index.go": 5.5, - "internal/scratch.go": 0.0, - "backend/search_test.go": 2.1, - }, - MeanRank: 3.3, - } - cases := []struct { - name string - path string - rank float64 - }{ - { - name: "rank for standard file", - path: "search.go", - rank: 10.23, - }, - { - name: "file with rank 0", - path: "internal/scratch.go", - rank: 0.0, - }, - { - name: "rank for test file", - path: "backend/search_test.go", - rank: 2.1, - }, - { - name: "file with missing rank", - path: "internal/docs.md", - rank: 3.3, - }, - { - name: "test file with missing rank", - path: "backend/index_test.go", - rank: 0.0, - }, - { - name: "third-party file with missing rank", - path: "node_modules/search/index.js", - rank: 0.0, - }, - } - - for _, tt := range cases { - t.Run(tt.name, func(t *testing.T) { - got := pathRanks.rank(tt.path, nil) - if got != tt.rank { - t.Errorf("expected file '%s' to have rank %f, but got %f", tt.path, tt.rank, got) - } - }) - } -} - -func runScript(t *testing.T, cwd string, script string) { +func runGit(t *testing.T, cwd string, args ...string) { t.Helper() err := os.MkdirAll(cwd, 0o755) @@ -841,9 +1006,16 @@ func runScript(t *testing.T, cwd string, script string) { t.Fatalf("ensuring path %q exists: %s", cwd, err) } - cmd := exec.Command("sh", "-euxc", script) + cmd := exec.Command("git", args...) cmd.Dir = cwd - cmd.Env = append([]string{"GIT_CONFIG_GLOBAL=", "GIT_CONFIG_SYSTEM="}, os.Environ()...) + cmd.Env = append(os.Environ(), + "GIT_CONFIG_GLOBAL=", + "GIT_CONFIG_SYSTEM=", + "GIT_COMMITTER_NAME=Kierkegaard", + "GIT_COMMITTER_EMAIL=soren@apache.com", + "GIT_AUTHOR_NAME=Kierkegaard", + "GIT_AUTHOR_EMAIL=soren@apache.com", + ) if out, err := cmd.CombinedOutput(); err != nil { t.Fatalf("execution error: %v, output %s", err, out) @@ -851,11 +1023,13 @@ func runScript(t *testing.T, cwd string, script string) { } func TestSetTemplates_e2e(t *testing.T) { + t.Parallel() + repositoryDir := t.TempDir() // setup: initialize the repository and all of its branches - runScript(t, repositoryDir, "git init -b master") - runScript(t, repositoryDir, "git config remote.origin.url git@github.com:sourcegraph/zoekt.git") + runGit(t, repositoryDir, "init", "-b", "master") + runGit(t, repositoryDir, "config", "remote.origin.url", "git@github.com:sourcegraph/zoekt.git") desc := zoekt.Repository{} if err := setTemplatesFromConfig(&desc, repositoryDir); err != nil { t.Fatalf("setTemplatesFromConfig: %v", err) @@ -866,7 +1040,24 @@ func TestSetTemplates_e2e(t *testing.T) { } } +func TestSetTemplates_Worktree(t *testing.T) { + t.Parallel() + + _, worktreeDir := initGitWorktree(t, "hello.go", "package main\n") + desc := zoekt.Repository{} + + if err := setTemplatesFromConfig(&desc, worktreeDir); err != nil { + t.Fatalf("setTemplatesFromConfig(worktree): %v", err) + } + + if got, want := desc.FileURLTemplate, `{{URLJoinPath "https://github.com/sourcegraph/zoekt" "blob" .Version .Path}}`; got != want { + t.Errorf("got %q, want %q", got, want) + } +} + func TestSetTemplates(t *testing.T) { + t.Parallel() + base := "https://example.com/repo/name" version := "VERSION" path := "dir/name.txt" @@ -920,10 +1111,12 @@ func TestSetTemplates(t *testing.T) { for _, tc := range cases { t.Run(tc.typ, func(t *testing.T) { + t.Parallel() + assertOutput := func(templateText string, want string) { t.Helper() - tt, err := zoekt.ParseTemplate(templateText) + tt, err := index.ParseTemplate(templateText) if err != nil { t.Fatal(err) } @@ -956,9 +1149,8 @@ func TestSetTemplates(t *testing.T) { } func BenchmarkPrepareNormalBuild(b *testing.B) { - // NOTE: To run the benchmark, download a large repo (like github.com/chromium/chromium/) and change this to its path. - repoDir := "/path/to/your/repo" - repo, err := git.PlainOpen(repoDir) + repoDir := requireBenchGitRepo(b) + repo, err := plainOpenRepo(repoDir) if err != nil { b.Fatalf("Failed to open test repository: %v", err) } @@ -967,8 +1159,8 @@ func BenchmarkPrepareNormalBuild(b *testing.B) { RepoDir: repoDir, Submodules: false, BranchPrefix: "refs/heads/", - Branches: []string{"main"}, - BuildOptions: build.Options{ + Branches: []string{"HEAD"}, + BuildOptions: index.Options{ RepositoryDescription: zoekt.Repository{ Name: "test-repo", URL: "https://github.com/example/test-repo", diff --git a/gitindex/main_test.go b/gitindex/main_test.go new file mode 100644 index 000000000..35edc2a5b --- /dev/null +++ b/gitindex/main_test.go @@ -0,0 +1,19 @@ +package gitindex + +import ( + "flag" + "io" + "log" + "os" + "testing" +) + +func TestMain(m *testing.M) { + flag.Parse() + + if !testing.Verbose() { + log.SetOutput(io.Discard) + } + + os.Exit(m.Run()) +} diff --git a/gitindex/repocache_test.go b/gitindex/repocache_test.go index 169f2741e..60701c3a2 100644 --- a/gitindex/repocache_test.go +++ b/gitindex/repocache_test.go @@ -22,6 +22,8 @@ import ( ) func TestListReposNonExistent(t *testing.T) { + t.Parallel() + u, err := url.Parse("https://gerrit.googlesource.com/") if err != nil { t.Fatalf("url.Parse: %v", err) @@ -34,6 +36,8 @@ func TestListReposNonExistent(t *testing.T) { } func TestListRepos(t *testing.T) { + t.Parallel() + dir := t.TempDir() if err := createSubmoduleRepo(dir); err != nil { diff --git a/gitindex/slab.go b/gitindex/slab.go new file mode 100644 index 000000000..634b1a8cf --- /dev/null +++ b/gitindex/slab.go @@ -0,0 +1,32 @@ +package gitindex + +// contentSlab reduces per-file heap allocations by sub-slicing from a +// shared buffer. Each returned slice has its capacity capped (3-index +// slice) so appending to one file's content cannot overwrite adjacent +// data. Files larger than the slab get their own allocation. +type contentSlab struct { + buf []byte + cap int +} + +func newContentSlab(slabCap int) contentSlab { + return contentSlab{ + buf: make([]byte, 0, slabCap), + cap: slabCap, + } +} + +// alloc returns a byte slice of length n. The caller must write into it +// immediately (the bytes are uninitialized when sourced from the slab). +func (s *contentSlab) alloc(n int) []byte { + if n > s.cap { + return make([]byte, n) + } + if len(s.buf)+n > cap(s.buf) { + s.buf = make([]byte, n, s.cap) + return s.buf[:n:n] + } + off := len(s.buf) + s.buf = s.buf[:off+n] + return s.buf[off : off+n : off+n] +} diff --git a/gitindex/slab_test.go b/gitindex/slab_test.go new file mode 100644 index 000000000..39adbd3a7 --- /dev/null +++ b/gitindex/slab_test.go @@ -0,0 +1,72 @@ +package gitindex + +import "testing" + +func TestContentSlab(t *testing.T) { + t.Run("fits in slab", func(t *testing.T) { + s := newContentSlab(1024) + b := s.alloc(100) + if len(b) != 100 { + t.Fatalf("len = %d, want 100", len(b)) + } + if cap(b) != 100 { + t.Fatalf("cap = %d, want 100 (3-index slice)", cap(b)) + } + }) + + t.Run("cap is capped so append cannot corrupt adjacent data", func(t *testing.T) { + s := newContentSlab(1024) + a := s.alloc(10) + copy(a, []byte("aaaaaaaaaa")) + + b := s.alloc(10) + copy(b, []byte("bbbbbbbbbb")) + + // Appending to a must not overwrite b. + a = append(a, 'X') // triggers new backing array since cap==len + if string(b) != "bbbbbbbbbb" { + t.Fatalf("adjacent data corrupted: got %q", b) + } + _ = a + }) + + t.Run("slab rollover", func(t *testing.T) { + s := newContentSlab(64) + a := s.alloc(60) + if len(a) != 60 || cap(a) != 60 { + t.Fatalf("a: len=%d cap=%d", len(a), cap(a)) + } + // Next alloc doesn't fit in remaining 4 bytes → new slab. + b := s.alloc(10) + if len(b) != 10 || cap(b) != 10 { + t.Fatalf("b: len=%d cap=%d", len(b), cap(b)) + } + // a and b should not share backing arrays. + copy(a, make([]byte, 60)) + copy(b, []byte("0123456789")) + if string(b) != "0123456789" { + t.Fatal("rollover corrupted data") + } + }) + + t.Run("oversized allocation", func(t *testing.T) { + s := newContentSlab(64) + b := s.alloc(128) + if len(b) != 128 { + t.Fatalf("len = %d, want 128", len(b)) + } + // Oversized alloc should not consume slab space. + c := s.alloc(32) + if len(c) != 32 || cap(c) != 32 { + t.Fatalf("c: len=%d cap=%d", len(c), cap(c)) + } + }) + + t.Run("zero size", func(t *testing.T) { + s := newContentSlab(64) + b := s.alloc(0) + if len(b) != 0 { + t.Fatalf("len = %d, want 0", len(b)) + } + }) +} diff --git a/gitindex/submodule_test.go b/gitindex/submodule_test.go index ae630831e..61dcab801 100644 --- a/gitindex/submodule_test.go +++ b/gitindex/submodule_test.go @@ -20,6 +20,8 @@ import ( ) func TestParseGitModules(t *testing.T) { + t.Parallel() + cases := []struct { data string want map[string]*SubmoduleEntry diff --git a/gitindex/tree.go b/gitindex/tree.go index 7b64eb801..b14f0e252 100644 --- a/gitindex/tree.go +++ b/gitindex/tree.go @@ -23,12 +23,12 @@ import ( "path/filepath" "strings" + "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/filemode" "github.com/go-git/go-git/v5/plumbing/object" - "github.com/sourcegraph/zoekt/ignore" - "github.com/go-git/go-git/v5" + "github.com/sourcegraph/zoekt/ignore" ) // RepoWalker walks one or more commit trees, collecting the files to index in its Files map. @@ -178,9 +178,16 @@ func (rw *RepoWalker) handleSubmodule(p string, id *plumbing.Hash, branch string } func (rw *RepoWalker) handleEntry(p string, e *object.TreeEntry, branch string, subRepoVersions map[string]plumbing.Hash, ig *ignore.Matcher) error { - if e.Mode == filemode.Submodule && rw.repoCache != nil { - if err := rw.tryHandleSubmodule(p, &e.Hash, branch, subRepoVersions, ig); err != nil { - return fmt.Errorf("submodule %s: %v", p, err) + if e.Mode == filemode.Submodule { + if rw.repoCache != nil { + // Index the submodule using repo cache + if err := rw.tryHandleSubmodule(p, &e.Hash, branch, subRepoVersions, ig); err != nil { + return fmt.Errorf("submodule %s: %v", p, err) + } + } else { + // Record the commit ID for the submodule path + // This will be the submodule's commit hash, not the parent's + subRepoVersions[p] = e.Hash } } diff --git a/gitindex/tree_test.go b/gitindex/tree_test.go index ba4518a29..5057fd2c4 100644 --- a/gitindex/tree_test.go +++ b/gitindex/tree_test.go @@ -29,12 +29,12 @@ import ( "github.com/google/go-cmp/cmp" "github.com/grafana/regexp" - "github.com/sourcegraph/zoekt/ignore" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/ignore" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/query" - "github.com/sourcegraph/zoekt/shards" + "github.com/sourcegraph/zoekt/search" ) func createSubmoduleRepo(dir string) error { @@ -106,6 +106,8 @@ EOF } func TestFindGitRepos(t *testing.T) { + t.Parallel() + dir := t.TempDir() if err := createSubmoduleRepo(dir); err != nil { @@ -141,6 +143,8 @@ func TestFindGitRepos(t *testing.T) { } func TestCollectFiles(t *testing.T) { + t.Parallel() + dir := t.TempDir() if err := createSubmoduleRepo(dir); err != nil { @@ -195,6 +199,8 @@ func TestCollectFiles(t *testing.T) { } func TestSubmoduleIndex(t *testing.T) { + t.Parallel() + dir := t.TempDir() if err := createSubmoduleRepo(dir); err != nil { @@ -203,7 +209,7 @@ func TestSubmoduleIndex(t *testing.T) { indexDir := t.TempDir() - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, } opts := Options{ @@ -219,7 +225,7 @@ func TestSubmoduleIndex(t *testing.T) { t.Fatalf("IndexGitRepo: %v", err) } - searcher, err := shards.NewDirectorySearcher(indexDir) + searcher, err := search.NewDirectorySearcher(indexDir) if err != nil { t.Fatal("NewDirectorySearcher", err) } @@ -258,6 +264,72 @@ func TestSubmoduleIndex(t *testing.T) { } } +func TestSubmoduleIndexWithoutRepocache(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + + if err := createSubmoduleRepo(dir); err != nil { + t.Fatalf("createSubmoduleRepo: %v", err) + } + + indexDir := t.TempDir() + + buildOpts := index.Options{ + RepositoryDescription: zoekt.Repository{Name: "adir"}, + IndexDir: indexDir, + } + opts := Options{ + RepoDir: filepath.Join(dir, "adir"), + BuildOptions: buildOpts, + BranchPrefix: "refs/heads/", + Branches: []string{"master"}, + Submodules: true, + Incremental: true, + } + if _, err := IndexGitRepo(opts); err != nil { + t.Fatalf("IndexGitRepo: %v", err) + } + + searcher, err := search.NewDirectorySearcher(indexDir) + if err != nil { + t.Fatal("NewDirectorySearcher", err) + } + defer searcher.Close() + + results, err := searcher.Search(context.Background(), + &query.Substring{Pattern: "bcont"}, + &zoekt.SearchOptions{}) + if err != nil { + t.Fatal("Search", err) + } + + if len(results.Files) != 1 { + t.Fatalf("got search result %v, want 1 file", results.Files) + } + + file := results.Files[0] + if got, want := file.SubRepositoryName, "bname"; got != want { + t.Errorf("got subrepo name %q, want %q", got, want) + } + if got, want := file.SubRepositoryPath, "bname"; got != want { + t.Errorf("got subrepo path %q, want %q", got, want) + } + + subVersion := file.Version + if len(subVersion) != 40 { + t.Fatalf("got %q, want hex sha1", subVersion) + } + + if results, err := searcher.Search(context.Background(), &query.Substring{Pattern: "acont"}, &zoekt.SearchOptions{}); err != nil { + t.Fatalf("Search('acont'): %v", err) + } else if len(results.Files) != 1 { + t.Errorf("got %v, want 1 result", results.Files) + } else if f := results.Files[0]; f.Version == subVersion { + t.Errorf("version in super repo matched version is subrepo.") + } +} + func createSymlinkRepo(dir string) error { if err := os.MkdirAll(dir, 0o755); err != nil { return err @@ -299,6 +371,8 @@ EOF } func TestSearchSymlinkByContent(t *testing.T) { + t.Parallel() + dir := t.TempDir() if err := createSymlinkRepo(dir); err != nil { @@ -307,7 +381,7 @@ func TestSearchSymlinkByContent(t *testing.T) { indexDir := t.TempDir() - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, } opts := Options{ @@ -323,7 +397,7 @@ func TestSearchSymlinkByContent(t *testing.T) { t.Fatalf("IndexGitRepo: %v", err) } - searcher, err := shards.NewDirectorySearcher(indexDir) + searcher, err := search.NewDirectorySearcher(indexDir) if err != nil { t.Fatal("NewDirectorySearcher", err) } @@ -356,6 +430,8 @@ func TestSearchSymlinkByContent(t *testing.T) { } func TestAllowMissingBranch(t *testing.T) { + t.Parallel() + dir := t.TempDir() if err := createSubmoduleRepo(dir); err != nil { @@ -364,7 +440,7 @@ func TestAllowMissingBranch(t *testing.T) { indexDir := t.TempDir() - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, } @@ -422,6 +498,8 @@ git update-ref refs/meta/config HEAD } func TestBranchWildcard(t *testing.T) { + t.Parallel() + dir := t.TempDir() if err := createMultibranchRepo(dir); err != nil { @@ -430,7 +508,7 @@ func TestBranchWildcard(t *testing.T) { indexDir := t.TempDir() - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, RepositoryDescription: zoekt.Repository{ Name: "repo", @@ -450,7 +528,7 @@ func TestBranchWildcard(t *testing.T) { t.Fatalf("IndexGitRepo: %v", err) } - searcher, err := shards.NewDirectorySearcher(indexDir) + searcher, err := search.NewDirectorySearcher(indexDir) if err != nil { t.Fatal("NewDirectorySearcher", err) } @@ -468,6 +546,8 @@ func TestBranchWildcard(t *testing.T) { } func TestSkipSubmodules(t *testing.T) { + t.Parallel() + dir := t.TempDir() if err := createSubmoduleRepo(dir); err != nil { @@ -476,7 +556,7 @@ func TestSkipSubmodules(t *testing.T) { indexDir := t.TempDir() - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, RepositoryDescription: zoekt.Repository{ Name: "gerrit.googlesource.com/adir", @@ -500,6 +580,8 @@ func TestSkipSubmodules(t *testing.T) { } func TestFullAndShortRefNames(t *testing.T) { + t.Parallel() + dir := t.TempDir() if err := createMultibranchRepo(dir); err != nil { @@ -508,7 +590,7 @@ func TestFullAndShortRefNames(t *testing.T) { indexDir := t.TempDir() - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, RepositoryDescription: zoekt.Repository{ Name: "repo", @@ -529,7 +611,7 @@ func TestFullAndShortRefNames(t *testing.T) { t.Fatalf("IndexGitRepo: %v", err) } - searcher, err := shards.NewDirectorySearcher(indexDir) + searcher, err := search.NewDirectorySearcher(indexDir) if err != nil { t.Fatal("NewDirectorySearcher", err) } @@ -545,6 +627,8 @@ func TestFullAndShortRefNames(t *testing.T) { } func TestUniq(t *testing.T) { + t.Parallel() + in := []string{"a", "b", "b", "c", "c"} want := []string{"a", "b", "c"} got := uniq(in) @@ -554,6 +638,8 @@ func TestUniq(t *testing.T) { } func TestLatestCommit(t *testing.T) { + t.Parallel() + dir := t.TempDir() indexDir := t.TempDir() @@ -561,7 +647,7 @@ func TestLatestCommit(t *testing.T) { t.Fatalf("createMultibranchRepo: %v", err) } - buildOpts := build.Options{ + buildOpts := index.Options{ IndexDir: indexDir, RepositoryDescription: zoekt.Repository{ Name: "repo", @@ -579,7 +665,7 @@ func TestLatestCommit(t *testing.T) { t.Fatalf("IndexGitRepo: %v", err) } - searcher, err := shards.NewDirectorySearcher(indexDir) + searcher, err := search.NewDirectorySearcher(indexDir) if err != nil { t.Fatal("NewDirectorySearcher", err) } diff --git a/go.mod b/go.mod index 6a22aceed..9b5d676e0 100644 --- a/go.mod +++ b/go.mod @@ -1,91 +1,97 @@ module github.com/sourcegraph/zoekt require ( - cloud.google.com/go/profiler v0.4.1 + cloud.google.com/go/profiler v0.4.2 github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 github.com/RoaringBitmap/roaring v1.9.4 - github.com/andygrunwald/go-gerrit v0.0.0-20240524171439-0983e87949db + github.com/andygrunwald/go-gerrit v1.0.0 github.com/bmatcuk/doublestar v1.3.4 github.com/dustin/go-humanize v1.0.1 github.com/felixge/fgprof v0.9.5 - github.com/fsnotify/fsnotify v1.7.0 - github.com/gfleury/go-bitbucket-v1 v0.0.0-20240822132758-a3031aa024b4 - github.com/go-enry/go-enry/v2 v2.8.9 - github.com/go-git/go-git/v5 v5.17.2 + github.com/fsnotify/fsnotify v1.8.0 + github.com/gfleury/go-bitbucket-v1 v0.0.0-20240917142304-df385efaac68 + github.com/go-enry/go-enry/v2 v2.9.1 + github.com/go-git/go-git/v5 v5.17.0 github.com/gobwas/glob v0.2.3 github.com/google/go-cmp v0.7.0 - github.com/google/go-github/v27 v27.0.6 + github.com/google/go-github/v78 v78.0.0 github.com/google/slothfs v0.0.0-20190717100203-59c1163fd173 github.com/grafana/regexp v0.0.0-20240607082908-2cb410fa05da github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 - github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 + github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.2.0 github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f github.com/opentracing/opentracing-go v1.2.0 github.com/peterbourgon/ff/v3 v3.4.0 github.com/pkg/errors v0.9.1 - github.com/prometheus/client_golang v1.20.3 + github.com/prometheus/client_golang v1.20.5 github.com/prometheus/procfs v0.15.1 github.com/rs/xid v1.6.0 github.com/shirou/gopsutil/v3 v3.24.5 - github.com/sourcegraph/go-ctags v0.0.0-20240424152308-4faeee4849da - github.com/sourcegraph/log v0.0.0-20240515122000-be5f7eea69b1 - github.com/sourcegraph/mountinfo v0.0.0-20240201124957-b314c0befab1 + github.com/sourcegraph/go-ctags v0.0.0-20250729094530-349a251d78d8 + github.com/sourcegraph/log v0.0.0-20241024013702-574f7079c888 + github.com/sourcegraph/mountinfo v0.0.0-20251117172727-e9f6a87f579c github.com/stretchr/testify v1.11.1 github.com/uber/jaeger-client-go v2.30.0+incompatible github.com/uber/jaeger-lib v2.4.1+incompatible - github.com/xanzy/go-gitlab v0.109.0 + github.com/wasilibs/go-re2 v1.10.0 github.com/xeipuuv/gojsonschema v1.2.0 - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0 - go.opentelemetry.io/contrib/propagators/jaeger v1.43.0 - go.opentelemetry.io/contrib/propagators/ot v1.43.0 - go.opentelemetry.io/otel v1.43.0 - go.opentelemetry.io/otel/bridge/opentracing v1.43.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 - go.opentelemetry.io/otel/sdk v1.43.0 - go.opentelemetry.io/otel/trace v1.43.0 + gitlab.com/gitlab-org/api/client-go v1.46.0 + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 + go.opentelemetry.io/contrib/propagators/jaeger v1.33.0 + go.opentelemetry.io/contrib/propagators/ot v1.33.0 + go.opentelemetry.io/otel v1.42.0 + go.opentelemetry.io/otel/bridge/opentracing v1.33.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0 + go.opentelemetry.io/otel/sdk v1.42.0 + go.opentelemetry.io/otel/trace v1.42.0 go.uber.org/atomic v1.11.0 - go.uber.org/automaxprocs v1.5.3 - golang.org/x/net v0.52.0 - golang.org/x/oauth2 v0.35.0 - golang.org/x/sync v0.20.0 - golang.org/x/sys v0.42.0 - google.golang.org/grpc v1.80.0 + go.uber.org/automaxprocs v1.6.0 + golang.org/x/net v0.47.0 + golang.org/x/oauth2 v0.34.0 + golang.org/x/sync v0.19.0 + golang.org/x/sys v0.41.0 + google.golang.org/grpc v1.75.0 google.golang.org/protobuf v1.36.11 + pgregory.net/rapid v1.2.0 ) require ( - github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/42wim/httpsig v1.2.2 // indirect github.com/davidmz/go-pageant v1.0.2 // indirect github.com/go-fed/httpsig v1.1.0 // indirect - github.com/hashicorp/go-version v1.6.0 // indirect + github.com/hashicorp/go-version v1.7.0 // indirect + github.com/kylelemons/godebug v1.1.0 // indirect + github.com/tetratelabs/wazero v1.9.0 // indirect + github.com/wasilibs/wazero-helpers v0.0.0-20240620070341-3dff1577cd52 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect ) require ( - cloud.google.com/go v0.115.1 // indirect - cloud.google.com/go/auth v0.9.3 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.4 // indirect - cloud.google.com/go/compute/metadata v0.9.0 // indirect - code.gitea.io/sdk/gitea v0.19.0 + cloud.google.com/go v0.118.0 // indirect + cloud.google.com/go/auth v0.14.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect + cloud.google.com/go/compute/metadata v0.7.0 // indirect + code.gitea.io/sdk/gitea v0.20.0 dario.cat/mergo v1.0.1 // indirect github.com/HdrHistogram/hdrhistogram-go v1.1.2 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/ProtonMail/go-crypto v1.1.6 // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/bits-and-blooms/bitset v1.14.3 // indirect - github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/bits-and-blooms/bitset v1.20.0 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 github.com/cloudflare/circl v1.6.3 // indirect github.com/cockroachdb/errors v1.11.3 // indirect - github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b // indirect + github.com/cockroachdb/logtags v0.0.0-20241215232642-bb51bb14a506 // indirect github.com/cockroachdb/redact v1.1.5 // indirect github.com/cyphar/filepath-securejoin v0.4.1 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emirpasic/gods v1.18.1 // indirect - github.com/fatih/color v1.17.0 // indirect - github.com/getsentry/sentry-go v0.28.1 // indirect + github.com/fatih/color v1.18.0 // indirect + github.com/getsentry/sentry-go v0.31.1 // indirect github.com/go-enry/go-oniguruma v1.2.1 // indirect github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect github.com/go-git/go-billy/v5 v5.8.0 @@ -94,31 +100,31 @@ require ( github.com/go-ole/go-ole v1.3.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect - github.com/google/go-querystring v1.1.0 // indirect - github.com/google/pprof v0.0.0-20240903155634-a8630aee4ab9 // indirect - github.com/google/s2a-go v0.1.8 // indirect + github.com/google/go-querystring v1.2.0 // indirect + github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect + github.com/google/s2a-go v0.1.9 // indirect github.com/google/uuid v1.6.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect - github.com/googleapis/gax-go/v2 v2.13.0 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect + github.com/googleapis/gax-go/v2 v2.14.1 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect - github.com/hashicorp/go-retryablehttp v0.7.7 // indirect + github.com/hashicorp/go-retryablehttp v0.7.8 // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect - github.com/klauspost/compress v1.17.9 // indirect + github.com/klauspost/compress v1.17.11 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect - github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-colorable v0.1.14 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/moby/sys/mountinfo v0.7.2 // indirect github.com/mschoch/smat v0.2.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pjbgf/sha1cd v0.3.2 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect github.com/prometheus/client_model v0.6.1 // indirect - github.com/prometheus/common v0.59.1 // indirect + github.com/prometheus/common v0.62.0 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect github.com/skeema/knownhosts v1.3.1 // indirect @@ -126,18 +132,17 @@ require ( github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect - go.opencensus.io v0.24.0 // indirect - go.opentelemetry.io/otel/metric v1.43.0 // indirect - go.opentelemetry.io/proto/otlp v1.10.0 // indirect - go.uber.org/multierr v1.11.0 // indirect + go.opentelemetry.io/otel/metric v1.42.0 // indirect + go.opentelemetry.io/proto/otlp v1.5.0 // indirect + go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 // indirect - golang.org/x/crypto v0.49.0 // indirect - golang.org/x/text v0.35.0 // indirect - golang.org/x/time v0.6.0 // indirect - google.golang.org/api v0.196.0 // indirect - google.golang.org/genproto v0.0.0-20240903143218-8af14fe29dc1 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20260406210006-6f92a3bedf2d // indirect + golang.org/x/crypto v0.45.0 // indirect + golang.org/x/text v0.32.0 // indirect + golang.org/x/time v0.14.0 // indirect + google.golang.org/api v0.217.0 // indirect + google.golang.org/genproto v0.0.0-20250115164207-1a7da9e5054f // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250811230008-5f3141c8851a // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index a1e0f8cb8..4ccb07313 100644 --- a/go.sum +++ b/go.sum @@ -1,24 +1,26 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.115.1 h1:Jo0SM9cQnSkYfp44+v+NQXHpcHqlnRJk2qxh6yvxxxQ= -cloud.google.com/go v0.115.1/go.mod h1:DuujITeaufu3gL68/lOFIirVNJwQeyf5UXyi+Wbgknc= -cloud.google.com/go/auth v0.9.3 h1:VOEUIAADkkLtyfr3BLa3R8Ed/j6w1jTBmARx+wb5w5U= -cloud.google.com/go/auth v0.9.3/go.mod h1:7z6VY+7h3KUdRov5F1i8NDP5ZzWKYmEPO842BgCsmTk= -cloud.google.com/go/auth/oauth2adapt v0.2.4 h1:0GWE/FUsXhf6C+jAkWgYm7X9tK8cuEIfy19DBn6B6bY= -cloud.google.com/go/auth/oauth2adapt v0.2.4/go.mod h1:jC/jOpwFP6JBxhB3P5Rr0a9HLMC/Pe3eaL4NmdvqPtc= -cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= -cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= -cloud.google.com/go/iam v1.2.0 h1:kZKMKVNk/IsSSc/udOb83K0hL/Yh/Gcqpz+oAkoIFN8= -cloud.google.com/go/iam v1.2.0/go.mod h1:zITGuWgsLZxd8OwAlX+eMFgZDXzBm7icj1PVTYG766Q= -cloud.google.com/go/profiler v0.4.1 h1:Q7+lOvikTGMJ/IAWocpYYGit4SIIoILmVZfEEWTORSY= -cloud.google.com/go/profiler v0.4.1/go.mod h1:LBrtEX6nbvhv1w/e5CPZmX9ajGG9BGLtGbv56Tg4SHs= +cloud.google.com/go v0.118.0 h1:tvZe1mgqRxpiVa3XlIGMiPcEUbP1gNXELgD4y/IXmeQ= +cloud.google.com/go v0.118.0/go.mod h1:zIt2pkedt/mo+DQjcT4/L3NDxzHPR29j5HcclNH+9PM= +cloud.google.com/go/auth v0.14.0 h1:A5C4dKV/Spdvxcl0ggWwWEzzP7AZMJSEIgrkngwhGYM= +cloud.google.com/go/auth v0.14.0/go.mod h1:CYsoRL1PdiDuqeQpZE0bP2pnPrGqFcOkI0nldEQis+A= +cloud.google.com/go/auth/oauth2adapt v0.2.7 h1:/Lc7xODdqcEw8IrZ9SvwnlLX6j9FHQM74z6cBk9Rw6M= +cloud.google.com/go/auth/oauth2adapt v0.2.7/go.mod h1:NTbTTzfvPl1Y3V1nPpOgl2w6d/FjO7NNUQaWSox6ZMc= +cloud.google.com/go/compute/metadata v0.7.0 h1:PBWF+iiAerVNe8UCHxdOt6eHLVc3ydFeOCw78U8ytSU= +cloud.google.com/go/compute/metadata v0.7.0/go.mod h1:j5MvL9PprKL39t166CoB1uVHfQMs4tFQZZcKwksXUjo= +cloud.google.com/go/iam v1.3.1 h1:KFf8SaT71yYq+sQtRISn90Gyhyf4X8RGgeAVC8XGf3E= +cloud.google.com/go/iam v1.3.1/go.mod h1:3wMtuyT4NcbnYNPLMBzYRFiEfjKfJlLVLrisE7bwm34= +cloud.google.com/go/profiler v0.4.2 h1:KojCmZ+bEPIQrd7bo2UFvZ2xUPLHl55KzHl7iaR4V2I= +cloud.google.com/go/profiler v0.4.2/go.mod h1:7GcWzs9deJHHdJ5J9V1DzKQ9JoIoTGhezwlLbwkOoCs= cloud.google.com/go/storage v1.43.0 h1:CcxnSohZwizt4LCzQHWvBf1/kvtHUn7gk9QERXPyXFs= cloud.google.com/go/storage v1.43.0/go.mod h1:ajvxEa7WmZS1PxvKRq4bq0tFT3vMd502JwstCcYv0Q0= -code.gitea.io/sdk/gitea v0.19.0 h1:8I6s1s4RHgzxiPHhOQdgim1RWIRcr0LVMbHBjBFXq4Y= -code.gitea.io/sdk/gitea v0.19.0/go.mod h1:IG9xZJoltDNeDSW0qiF2Vqx5orMWa7OhVWrjvrd5NpI= +code.gitea.io/sdk/gitea v0.20.0 h1:Zm/QDwwZK1awoM4AxdjeAQbxolzx2rIP8dDfmKu+KoU= +code.gitea.io/sdk/gitea v0.20.0/go.mod h1:faouBHC/zyx5wLgjmRKR62ydyvMzwWf3QnU0bH7Cw6U= dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= +github.com/42wim/httpsig v1.2.2 h1:ofAYoHUNs/MJOLqQ8hIxeyz2QxOz8qdSVvp3PX/oPgA= +github.com/42wim/httpsig v1.2.2/go.mod h1:P/UYo7ytNBFwc+dg35IubuAUIs8zj5zzFIgUCEl55WY= github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU= github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= @@ -33,8 +35,8 @@ github.com/ProtonMail/go-crypto v1.1.6/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQA github.com/RoaringBitmap/roaring v1.9.4 h1:yhEIoH4YezLYT04s1nHehNO64EKFTop/wBhxv2QzDdQ= github.com/RoaringBitmap/roaring v1.9.4/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90= github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= -github.com/andygrunwald/go-gerrit v0.0.0-20240524171439-0983e87949db h1:2YMDOckptG6kXomKVoTUfjOldHjUIl1r643sIikBjA4= -github.com/andygrunwald/go-gerrit v0.0.0-20240524171439-0983e87949db/go.mod h1:SeP12EkHZxEVjuJ2HZET304NBtHGG2X6w2Gzd0QXAZw= +github.com/andygrunwald/go-gerrit v1.0.0 h1:TrRGbso70QjJcXPC4kkLiKQrAfCBoBV+cBs7NrJxeno= +github.com/andygrunwald/go-gerrit v1.0.0/go.mod h1:SeP12EkHZxEVjuJ2HZET304NBtHGG2X6w2Gzd0QXAZw= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= @@ -43,12 +45,12 @@ github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZx github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= -github.com/bits-and-blooms/bitset v1.14.3 h1:Gd2c8lSNf9pKXom5JtD7AaKO8o7fGQ2LtFj1436qilA= -github.com/bits-and-blooms/bitset v1.14.3/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/bits-and-blooms/bitset v1.20.0 h1:2F+rfL86jE2d/bmw7OhqUg2Sj/1rURkBn3MdfoPyRVU= +github.com/bits-and-blooms/bitset v1.20.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/bmatcuk/doublestar v1.3.4 h1:gPypJ5xD31uhX6Tf54sDPUOBXTqKH4c9aPY66CyQrS0= github.com/bmatcuk/doublestar v1.3.4/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE= -github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= -github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -62,20 +64,19 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cloudflare/circl v1.6.3 h1:9GPOhQGF9MCYUeXyMYlqTR6a5gTrgR/fBLXvUgtVcg8= github.com/cloudflare/circl v1.6.3/go.mod h1:2eXP6Qfat4O/Yhh8BznvKnJ+uzEoTQ6jVKJRn81BiS4= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 h1:6xNmx7iTtyBRev0+D/Tv1FZd4SCg8axKApyNyRsAt/w= -github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/BuvyMxRnYBlmVaq4OLiKW6iRQfvC62cvdkI= github.com/cockroachdb/errors v1.11.3 h1:5bA+k2Y6r+oz/6Z/RFlNeVCesGARKuC6YymtcDrbC/I= github.com/cockroachdb/errors v1.11.3/go.mod h1:m4UIW4CDjx+R5cybPsNrRbreomiFqt8o1h1wUVazSd8= -github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZeQy818SGhaone5OnYfxFR/+AzdY3sf5aE= -github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= +github.com/cockroachdb/logtags v0.0.0-20241215232642-bb51bb14a506 h1:ASDL+UJcILMqgNeV5jiqR4j+sTuvQNHdf2chuKj1M5k= +github.com/cockroachdb/logtags v0.0.0-20241215232642-bb51bb14a506/go.mod h1:Mw7HqKr2kdtu6aYGn3tPmAftiP3QPX63LdK/zcariIo= github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30= github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s= github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davidmz/go-pageant v1.0.2 h1:bPblRCh5jGU+Uptpz6LgMZGD5hJoOt7otgT454WvHn0= github.com/davidmz/go-pageant v1.0.2/go.mod h1:P2EDDnMqIwG5Rrp05dTRITj9z2zpGcD9efWSkTNKLIE= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= @@ -87,33 +88,28 @@ github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FM github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/go-control-plane v0.14.0 h1:hbG2kr4RuFj222B6+7T83thSPqLjwBIfQawTkC++2HA= -github.com/envoyproxy/go-control-plane/envoy v1.36.0 h1:yg/JjO5E7ubRyKX3m07GF3reDNEnfOboJ0QySbH736g= -github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/envoyproxy/protoc-gen-validate v1.3.0 h1:TvGH1wof4H33rezVKWSpqKz5NXWg5VPuZ0uONDT6eb4= -github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= -github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4= -github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI= +github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= +github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= github.com/felixge/fgprof v0.9.5 h1:8+vR6yu2vvSKn08urWyEuxx75NWPEvybbkBirEpsbVY= github.com/felixge/fgprof v0.9.5/go.mod h1:yKl+ERSa++RYOs32d8K6WEXCB4uXdLls4ZaZPpayhMM= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps= -github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= -github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= -github.com/getsentry/sentry-go v0.28.1 h1:zzaSm/vHmGllRM6Tpx1492r0YDzauArdBfkJRtY6P5k= -github.com/getsentry/sentry-go v0.28.1/go.mod h1:1fQZ+7l7eeJ3wYi82q5Hg8GqAPgefRq+FP/QhafYVgg= -github.com/gfleury/go-bitbucket-v1 v0.0.0-20240822132758-a3031aa024b4 h1:sRZ/hpZOEfU+/05QFOt6nRE+OAFWCP0x+qz+inrcbkw= -github.com/gfleury/go-bitbucket-v1 v0.0.0-20240822132758-a3031aa024b4/go.mod h1:bB7XwdZF40tLVnu9n5A9TjI2ddNZtLYImtwYwmcmnRo= +github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= +github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/getsentry/sentry-go v0.31.1 h1:ELVc0h7gwyhnXHDouXkhqTFSO5oslsRDk0++eyE0KJ4= +github.com/getsentry/sentry-go v0.31.1/go.mod h1:CYNcMMz73YigoHljQRG+qPF+eMq8gG72XcGN/p71BAY= +github.com/gfleury/go-bitbucket-v1 v0.0.0-20240917142304-df385efaac68 h1:iJXWkoIPk3e8RVHhQE/gXfP2TP3OLQ9vVPNSJ+oL6mM= +github.com/gfleury/go-bitbucket-v1 v0.0.0-20240917142304-df385efaac68/go.mod h1:bB7XwdZF40tLVnu9n5A9TjI2ddNZtLYImtwYwmcmnRo= github.com/gfleury/go-bitbucket-v1/test/bb-mock-server v0.0.0-20230825095122-9bc1711434ab h1:BeG9dDWckFi/p5Gvqq3wTEDXsUV4G6bdvjEHMOT2B8E= github.com/gfleury/go-bitbucket-v1/test/bb-mock-server v0.0.0-20230825095122-9bc1711434ab/go.mod h1:VssB0kb1cETNaFFC/0mHVCj+7i5TS2xraYq+tl9JLwE= github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c= github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU= -github.com/go-enry/go-enry/v2 v2.8.9 h1:vskZIABoxInDd5sHY49t+C/VgF8RWxRdRMoH5AdLqQU= -github.com/go-enry/go-enry/v2 v2.8.9/go.mod h1:9yrj4ES1YrbNb1Wb7/PWYr2bpaCXUGRt0uafN0ISyG8= +github.com/go-enry/go-enry/v2 v2.9.1 h1:G9iDteJ/Mc0F4Di5NeQknf83R2OkRbwY9cAYmcqVG6U= +github.com/go-enry/go-enry/v2 v2.9.1/go.mod h1:9yrj4ES1YrbNb1Wb7/PWYr2bpaCXUGRt0uafN0ISyG8= github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo= github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4= github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= @@ -126,8 +122,8 @@ github.com/go-git/go-billy/v5 v5.8.0 h1:I8hjc3LbBlXTtVuFNJuwYuMiHvQJDq1AT6u4DwDz github.com/go-git/go-billy/v5 v5.8.0/go.mod h1:RpvI/rw4Vr5QA+Z60c6d6LXH0rYJo0uD5SqfmrrheCY= github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4= github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII= -github.com/go-git/go-git/v5 v5.17.2 h1:B+nkdlxdYrvyFK4GPXVU8w1U+YkbsgciIR7f2sZJ104= -github.com/go-git/go-git/v5 v5.17.2/go.mod h1:pW/VmeqkanRFqR6AljLcs7EA7FbZaN5MQqO7oZADXpo= +github.com/go-git/go-git/v5 v5.17.0 h1:AbyI4xf+7DsjINHMu35quAh4wJygKBKBuXVjV/pxesM= +github.com/go-git/go-git/v5 v5.17.0/go.mod h1:f82C4YiLx+Lhi8eHxltLeGC5uBTXSFa6PC5WW9o4SjI= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= @@ -149,7 +145,6 @@ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= @@ -158,67 +153,57 @@ github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+Licev github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= -github.com/google/go-github/v27 v27.0.6 h1:oiOZuBmGHvrGM1X9uNUAUlLgp5r1UUO/M/KnbHnLRlQ= -github.com/google/go-github/v27 v27.0.6/go.mod h1:/0Gr8pJ55COkmv+S/yPKCczSkUPIM/LnFyubufRNIS0= -github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= -github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= +github.com/google/go-github/v78 v78.0.0 h1:b1tytzFE8i//lRVDx5Qh/EdJbtTPtSVD3nF7hraEs9w= +github.com/google/go-github/v78 v78.0.0/go.mod h1:Uxvdzy82AkNlC6JQ57se9TqvmgBT7RF0ouHDNg2jd6g= github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= +github.com/google/go-querystring v1.2.0 h1:yhqkPbu2/OH+V9BfpCVPZkNmUXhb2gBxJArfhIxNtP0= +github.com/google/go-querystring v1.2.0/go.mod h1:8IFJqpSRITyJ8QhQ13bmbeMBDfmeEJZD5A0egEOmkqU= github.com/google/pprof v0.0.0-20240227163752-401108e1b7e7/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= -github.com/google/pprof v0.0.0-20240903155634-a8630aee4ab9 h1:q5g0N9eal4bmJwXHC5z0QCKs8qhS35hFfq0BAYsIwZI= -github.com/google/pprof v0.0.0-20240903155634-a8630aee4ab9/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= -github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= -github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= +github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/ZoQgRgVIWFJljSWa/zetS2WTvg= +github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= +github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= github.com/google/slothfs v0.0.0-20190717100203-59c1163fd173 h1:iuModVoTuW2lBUobX9QBgqD+ipHbWKug6n8qkJfDtUE= github.com/google/slothfs v0.0.0-20190717100203-59c1163fd173/go.mod h1:kzvK/MFjZSNdFgc1tCZML3E1nVvnB4/npSKEuvMoECU= -github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw= github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA= -github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDPT0hH1s= -github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A= +github.com/googleapis/gax-go/v2 v2.14.1 h1:hb0FFeiPaQskmvakKu5EbCbpntQn48jyHuvrkurSS/Q= +github.com/googleapis/gax-go/v2 v2.14.1/go.mod h1:Hb/NubMaVM88SrNkvl8X/o8XWwDJEPqouaLeN2IUxoA= github.com/gorilla/mux v1.7.4 h1:VuZ8uybHlWmqV03+zRzdwKL4tUnIp1MAQtp1mIFE1bc= github.com/gorilla/mux v1.7.4/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= github.com/grafana/regexp v0.0.0-20240607082908-2cb410fa05da h1:BML5sNe+bw2uO8t8cQSwe5QhvoP04eHPF7bnaQma0Kw= github.com/grafana/regexp v0.0.0-20240607082908-2cb410fa05da/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk= +github.com/graph-gophers/graphql-go v1.9.0 h1:yu0ucKHLc5qGpRwLYKIWtr9bOoxovkWasuBrPQwlHls= +github.com/graph-gophers/graphql-go v1.9.0/go.mod h1:23olKZ7duEvHlF/2ELEoSZaY1aNPfShjP782SOoNTyM= github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDaL56wXCB/5+wF6uHfaI= github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8= github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 h1:qnpSQwGEnkcRpTqNOIR6bJbR0gAorgP9CSALpRcKoAA= github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1/go.mod h1:lXGCsh6c22WGtjr+qGHj1otzZpV/1kwTMAqkwZsnWRU= -github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 h1:pRhl55Yx1eC7BZ1N+BBWwnKaMyD8uC+34TLdndZMAKk= -github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0/go.mod h1:XKMd7iuf/RGPSMJ/U4HP0zS2Z9Fh8Ps9a+6X26m/tmI= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= +github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.2.0 h1:kQ0NI7W1B3HwiN5gAYtY+XFItDPbLBwYRxAqbFTyDes= +github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.2.0/go.mod h1:zrT2dxOAjNFPRGjTUe2Xmb4q4YdUwVvQFV6xiCSf+z0= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 h1:VNqngBF40hVlDloBruUehVYC3ArSgIyScOAyMRqBxRg= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1/go.mod h1:RBRO7fro65R6tjKzYgLAFo0t1QEXY1Dp+i/bvpRiqiQ= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= -github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= -github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= -github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mOkIeek= -github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/hashicorp/go-retryablehttp v0.7.8 h1:ylXZWnqa7Lhqpk0L1P1LzDtGcCR0rPVUrx/c8Unxc48= +github.com/hashicorp/go-retryablehttp v0.7.8/go.mod h1:rjiScheydd+CxvumBsIrFKlx3iS0jrZ7LvzFGFmuKbw= +github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= +github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hexops/autogold v0.8.1/go.mod h1:97HLDXyG23akzAoRYJh/2OBs3kd80eHyKPvZw0S5ZBY= github.com/hexops/autogold v1.3.1 h1:YgxF9OHWbEIUjhDbpnLhgVsjUDsiHDTyDfy2lrfdlzo= github.com/hexops/autogold v1.3.1/go.mod h1:sQO+mQUCVfxOKPht+ipDSkJ2SCJ7BNJVHZexsXqWMx4= @@ -235,8 +220,8 @@ github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4 github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= @@ -251,8 +236,9 @@ github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+ github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= -github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= +github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= @@ -274,10 +260,6 @@ github.com/nightlyone/lockfile v1.0.0 h1:RHep2cFKK4PonZJDdEl4GmkabuhbsRMgk/k3uAm github.com/nightlyone/lockfile v1.0.0/go.mod h1:rywoIealpdNse2r832aiD9jRk8ErCatROs6LzC841CI= github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= -github.com/opentracing-contrib/go-grpc v0.1.2 h1:MP16Ozc59kqqwn1v18aQxpeGZhsBanJ2iurZYaQSZ+g= -github.com/opentracing-contrib/go-grpc v0.1.2/go.mod h1:glU6rl1Fhfp9aXUHkE36K2mR4ht8vih0ekOVlWKEUHM= -github.com/opentracing-contrib/go-grpc/test v0.0.0-20260320052726-de6f1ccdd147 h1:MtzxRkCJU9aGlcL4pbcYcr9qSfcfOMkEIvJYiBIxLr4= -github.com/opentracing-contrib/go-grpc/test v0.0.0-20260320052726-de6f1ccdd147/go.mod h1:gvr3V5Pls/X/wRxvoNuGdDxrHUuTqHHUA1dZJmlAOuo= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= @@ -292,21 +274,20 @@ github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsK github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= -github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU= github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= -github.com/prometheus/client_golang v1.20.3 h1:oPksm4K8B+Vt35tUhw6GbSNSgVlVSBH0qELP/7u83l4= -github.com/prometheus/client_golang v1.20.3/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.59.1 h1:LXb1quJHWm1P6wq/U824uxYi4Sg0oGvNeUm1z5dJoX0= -github.com/prometheus/common v0.59.1/go.mod h1:GpWM7dewqmVYcd7SmRaiWVe9SSqjf0UrwnYnpEZNuT0= +github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io= +github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= @@ -323,12 +304,12 @@ github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6Mwd github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8= github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY= -github.com/sourcegraph/go-ctags v0.0.0-20240424152308-4faeee4849da h1:hNVb44yWOr43HqizO4++mgA82tusAvxMjib1arIN0a0= -github.com/sourcegraph/go-ctags v0.0.0-20240424152308-4faeee4849da/go.mod h1:Or1cqbhDzkbH+hlwv5iW7uCTPEMKH9u/mTUh7otRQHY= -github.com/sourcegraph/log v0.0.0-20240515122000-be5f7eea69b1 h1:5QRKAfZ/qyFIvBFN4SEEnea955/CesDRh3mC53TmPLk= -github.com/sourcegraph/log v0.0.0-20240515122000-be5f7eea69b1/go.mod h1:IDp09QkoqS8Z3CyN2RW6vXjgABkNpDbyjLIHNQwQ8P8= -github.com/sourcegraph/mountinfo v0.0.0-20240201124957-b314c0befab1 h1:nBb4Cp27e5UH4Imc53cDcI+6WT6Hm5NR0TIguAqbjUI= -github.com/sourcegraph/mountinfo v0.0.0-20240201124957-b314c0befab1/go.mod h1:ghoEiutaNVERt2cu5q/bU3HOo29AHGSPrRZE1sOaA0w= +github.com/sourcegraph/go-ctags v0.0.0-20250729094530-349a251d78d8 h1:hpCD/FvbXLR7/034fKD0CQ8LmT4zoQfT2DzJIjqMzUI= +github.com/sourcegraph/go-ctags v0.0.0-20250729094530-349a251d78d8/go.mod h1:Or1cqbhDzkbH+hlwv5iW7uCTPEMKH9u/mTUh7otRQHY= +github.com/sourcegraph/log v0.0.0-20241024013702-574f7079c888 h1:9PUH8Hn8mVhPTtRKqot1HHsbLRDP0H2A+FSyuRumP2Q= +github.com/sourcegraph/log v0.0.0-20241024013702-574f7079c888/go.mod h1:IDp09QkoqS8Z3CyN2RW6vXjgABkNpDbyjLIHNQwQ8P8= +github.com/sourcegraph/mountinfo v0.0.0-20251117172727-e9f6a87f579c h1:6F/5RpKA1mga6IAHIYsiaCzQF4hzZEAifM08oO7BFoE= +github.com/sourcegraph/mountinfo v0.0.0-20251117172727-e9f6a87f579c/go.mod h1:ghoEiutaNVERt2cu5q/bU3HOo29AHGSPrRZE1sOaA0w= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= @@ -344,12 +325,16 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I= +github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM= github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaOOb6ThwMmTEbhRwtKR97o= github.com/uber/jaeger-client-go v2.30.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVKhn2Um6rjCsSsg= github.com/uber/jaeger-lib v2.4.1+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U= -github.com/xanzy/go-gitlab v0.109.0 h1:RcRme5w8VpLXTSTTMZdVoQWY37qTJWg+gwdQl4aAttE= -github.com/xanzy/go-gitlab v0.109.0/go.mod h1:wKNKh3GkYDMOsGmnfuX+ITCmDuSDWFO0G+C4AygL9RY= +github.com/wasilibs/go-re2 v1.10.0 h1:vQZEBYZOCA9jdBMmrO4+CvqyCj0x4OomXTJ4a5/urQ0= +github.com/wasilibs/go-re2 v1.10.0/go.mod h1:k+5XqO2bCJS+QpGOnqugyfwC04nw0jaglmjrrkG8U6o= +github.com/wasilibs/wazero-helpers v0.0.0-20240620070341-3dff1577cd52 h1:OvLBa8SqJnZ6P+mjlzc2K7PM22rRUPE1x32G9DTPrC4= +github.com/wasilibs/wazero-helpers v0.0.0-20240620070341-3dff1577cd52/go.mod h1:jMeV4Vpbi8osrE/pKUxRZkVaA0EX7NZN0A9/oRzgpgY= github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= @@ -364,43 +349,45 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +gitlab.com/gitlab-org/api/client-go v1.46.0 h1:YxBWFZIFYKcGESCb9fpkwzouo+apyB9pr/XTWzNoL24= +gitlab.com/gitlab-org/api/client-go v1.46.0/go.mod h1:FtgyU6g2HS5+fMhw6nLK96GBEEBx5MzntOiJWfIaiN8= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0 h1:0Qx7VGBacMm9ZENQ7TnNObTYI4ShC+lHI16seduaxZo= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0/go.mod h1:Sje3i3MjSPKTSPvVWCaL8ugBzJwik3u4smCjUeuupqg= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 h1:YH4g8lQroajqUwWbq/tr2QX1JFmEXaDLgG+ew9bLMWo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0/go.mod h1:fvPi2qXDqFs8M4B4fmJhE92TyQs9Ydjlg3RvfUp+NbQ= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8= -go.opentelemetry.io/contrib/propagators/jaeger v1.43.0 h1:peiLMz1+aqJE+3L4mOVtR9wlmv+yh/JVYXCBjqmzJJE= -go.opentelemetry.io/contrib/propagators/jaeger v1.43.0/go.mod h1:Agvif+4A8p/3UtZzJ0MCcDEuQwgtrzM71DueU41DCs8= -go.opentelemetry.io/contrib/propagators/ot v1.43.0 h1:Hh1HahlGc81AOE7siqi1tVOlbanY/UxMMWedpb0d5oQ= -go.opentelemetry.io/contrib/propagators/ot v1.43.0/go.mod h1:58MlyS7lghzYvAm5LN9gGmZpCMQEMB5vpZp9SRgOyE4= -go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= -go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= -go.opentelemetry.io/otel/bridge/opentracing v1.43.0 h1:rI9LWd0BPmaEZeTg/FFUUs5hnJNfQ+W7xAGaLgu+4mk= -go.opentelemetry.io/otel/bridge/opentracing v1.43.0/go.mod h1:AQoGTVOeWESXlMsmxq2CMJ8+jtKrXH78i4Po6L4f3hI= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bTWkw0ICGcOLCAI5l6zsD1j20k= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 h1:RAE+JPfvEmvy+0LzyUA25/SGawPwIUbZ6u0Wug54sLc= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0/go.mod h1:AGmbycVGEsRx9mXMZ75CsOyhSP6MFIcj/6dnG+vhVjk= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 h1:3iZJKlCZufyRzPzlQhUIWVmfltrXuGyfjREgGP3UUjc= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0/go.mod h1:/G+nUPfhq2e+qiXMGxMwumDrP5jtzU+mWN7/sjT2rak= -go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= -go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= -go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= -go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= -go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw= -go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A= -go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= -go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= -go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g= -go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk= +go.opentelemetry.io/contrib/propagators/jaeger v1.33.0 h1:Jok/dG8kfp+yod29XKYV/blWgYPlMuRUoRHljrXMF5E= +go.opentelemetry.io/contrib/propagators/jaeger v1.33.0/go.mod h1:ku/EpGk44S5lyVMbtJRK2KFOnXEehxf6SDnhu1eZmjA= +go.opentelemetry.io/contrib/propagators/ot v1.33.0 h1:xj/pQFKo4ROsx0v129KpLgFwaYMgFTu3dAMEEih97cY= +go.opentelemetry.io/contrib/propagators/ot v1.33.0/go.mod h1:/xxHCLhTmaypEFwMViRGROj2qgrGiFrkxIlATt0rddc= +go.opentelemetry.io/otel v1.42.0 h1:lSQGzTgVR3+sgJDAU/7/ZMjN9Z+vUip7leaqBKy4sho= +go.opentelemetry.io/otel v1.42.0/go.mod h1:lJNsdRMxCUIWuMlVJWzecSMuNjE7dOYyWlqOXWkdqCc= +go.opentelemetry.io/otel/bridge/opentracing v1.33.0 h1:eH88qvKdY7ns7Xu6WlJBQNOzZ3MVvBR6tEl2euaYS9w= +go.opentelemetry.io/otel/bridge/opentracing v1.33.0/go.mod h1:FNai/nhRSn/kHyv+V1zaf/30BU8hO/DXo0MvV0PaUS8= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 h1:Vh5HayB/0HHfOQA7Ctx69E/Y/DcQSMPpKANYVMQ7fBA= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0/go.mod h1:cpgtDBaqD/6ok/UG0jT15/uKjAY8mRA53diogHBg3UI= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 h1:5pojmb1U1AogINhN3SurB+zm/nIcusopeBNp42f45QM= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0/go.mod h1:57gTHJSE5S1tqg+EKsLPlTWhpHMsWlVmer+LA926XiA= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0 h1:wpMfgF8E1rkrT1Z6meFh1NDtownE9Ii3n3X2GJYjsaU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0/go.mod h1:wAy0T/dUbs468uOlkT31xjvqQgEVXv58BRFWEgn5v/0= +go.opentelemetry.io/otel/metric v1.42.0 h1:2jXG+3oZLNXEPfNmnpxKDeZsFI5o4J+nz6xUlaFdF/4= +go.opentelemetry.io/otel/metric v1.42.0/go.mod h1:RlUN/7vTU7Ao/diDkEpQpnz3/92J9ko05BIwxYa2SSI= +go.opentelemetry.io/otel/sdk v1.42.0 h1:LyC8+jqk6UJwdrI/8VydAq/hvkFKNHZVIWuslJXYsDo= +go.opentelemetry.io/otel/sdk v1.42.0/go.mod h1:rGHCAxd9DAph0joO4W6OPwxjNTYWghRWmkHuGbayMts= +go.opentelemetry.io/otel/sdk/metric v1.42.0 h1:D/1QR46Clz6ajyZ3G8SgNlTJKBdGp84q9RKCAZ3YGuA= +go.opentelemetry.io/otel/sdk/metric v1.42.0/go.mod h1:Ua6AAlDKdZ7tdvaQKfSmnFTdHx37+J4ba8MwVCYM5hc= +go.opentelemetry.io/otel/trace v1.42.0 h1:OUCgIPt+mzOnaUTpOQcBiM/PLQ/Op7oq6g4LenLmOYY= +go.opentelemetry.io/otel/trace v1.42.0/go.mod h1:f3K9S+IFqnumBkKhRJMeaZeNk9epyhnCmQh/EysQCdc= +go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4= +go.opentelemetry.io/proto/otlp v1.5.0/go.mod h1:keN8WnHxOy8PG0rQZjJJ5A2ebUoafqWp0eVQ4yIXvJ4= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= -go.uber.org/automaxprocs v1.5.3 h1:kWazyxZUrS3Gs4qUpbwo5kEIMGe/DAvi5Z4tl2NW4j8= -go.uber.org/automaxprocs v1.5.3/go.mod h1:eRbA25aqJrxAbsLO0xy5jVwPt7FQnRgjW+efnwa1WM0= +go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= +go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= @@ -419,16 +406,16 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= -golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= -golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= +golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= +golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= -golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= -golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= +golang.org/x/exp v0.0.0-20250813145105-42675adae3e6 h1:SbTAbRFnd5kjQXbczszQ0hdk3ctwYf3qBNH9jIsGclE= +golang.org/x/exp v0.0.0-20250813145105-42675adae3e6/go.mod h1:4QTo5u+SEIbbKW1RacMZq1YEfOBqeXa19JeshGi+zc4= golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= @@ -443,8 +430,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8= -golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w= +golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk= +golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -454,7 +441,6 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= @@ -463,24 +449,23 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= -golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= -golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= -golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= +golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220819030929-7fc1605a5dde/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= -golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -511,8 +496,8 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= -golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA= @@ -520,8 +505,8 @@ golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= -golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU= -golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= +golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= +golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= @@ -530,10 +515,10 @@ golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= -golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= -golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= -golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= +golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -549,49 +534,38 @@ golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k= -golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0= +golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ= +golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= -gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= -gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= -google.golang.org/api v0.196.0 h1:k/RafYqebaIJBO3+SMnfEGtFVlvp5vSgqTUF54UN/zg= -google.golang.org/api v0.196.0/go.mod h1:g9IL21uGkYgvQ5BZg6BAtoGJQIm8r6EgaAbpNey5wBE= +google.golang.org/api v0.217.0 h1:GYrUtD289o4zl1AhiTZL0jvQGa2RDLyC+kX1N/lfGOU= +google.golang.org/api v0.217.0/go.mod h1:qMc2E8cBAbQlRypBTBWHklNJlaZZJBwDv81B1Iu8oSI= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20240903143218-8af14fe29dc1 h1:BulPr26Jqjnd4eYDVe+YvyR7Yc2vJGkO5/0UxD0/jZU= -google.golang.org/genproto v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:hL97c3SYopEHblzpxRL4lSs523++l8DYxGM1FQiYmb4= -google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= -google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260406210006-6f92a3bedf2d h1:wT2n40TBqFY6wiwazVK9/iTWbsQrgk5ZfCSVFLO9LQA= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260406210006-6f92a3bedf2d/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= +google.golang.org/genproto v0.0.0-20250115164207-1a7da9e5054f h1:387Y+JbxF52bmesc8kq1NyYIp33dnxCw6eiA7JMsTmw= +google.golang.org/genproto v0.0.0-20250115164207-1a7da9e5054f/go.mod h1:0joYwWwLQh18AOj8zMYeZLjzuqcYTU3/nC5JdCvC3JI= +google.golang.org/genproto/googleapis/api v0.0.0-20250811230008-5f3141c8851a h1:DMCgtIAIQGZqJXMVzJF4MV8BlWoJh2ZuFiRdAleyr58= +google.golang.org/genproto/googleapis/api v0.0.0-20250811230008-5f3141c8851a/go.mod h1:y2yVLIE/CSMCPXaHnSKXxu1spLPnglFLegmgdY23uuE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 h1:eaY8u2EuxbRv7c3NiGK0/NedzVsCcV6hDuU5qPX5EGE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5/go.mod h1:M4/wBTSeyLxupu3W3tJtOgB14jILAS/XWPSSa3TAlJc= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= -google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= -google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4= +google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -614,4 +588,6 @@ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= mvdan.cc/gofumpt v0.4.0 h1:JVf4NN1mIpHogBj7ABpgOyZc65/UUOkKQFkoURsz4MM= mvdan.cc/gofumpt v0.4.0/go.mod h1:PljLOHDeZqgS8opHRKLzp2It2VBuSdteAgqUfzMTxlQ= +pgregory.net/rapid v1.2.0 h1:keKAYRcjm+e1F0oAuU5F5+YPAWcyxNNRK2wud503Gnk= +pgregory.net/rapid v1.2.0/go.mod h1:PY5XlDGj0+V1FCq0o192FdRhpKHGTRIWBgqjDBTrq04= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/grpc/defaults/server.go b/grpc/defaults/server.go new file mode 100644 index 000000000..714563d48 --- /dev/null +++ b/grpc/defaults/server.go @@ -0,0 +1,66 @@ +package defaults + +import ( + "sync" + + grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" + "github.com/prometheus/client_golang/prometheus" + sglog "github.com/sourcegraph/log" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" + "google.golang.org/grpc" + "google.golang.org/grpc/reflection" + + "github.com/sourcegraph/zoekt/grpc/internalerrs" + "github.com/sourcegraph/zoekt/grpc/messagesize" + "github.com/sourcegraph/zoekt/grpc/propagator" + "github.com/sourcegraph/zoekt/internal/tenant" +) + +func NewServer(logger sglog.Logger, additionalOpts ...grpc.ServerOption) *grpc.Server { + metrics := serverMetricsOnce() + + opts := []grpc.ServerOption{ + grpc.StatsHandler(otelgrpc.NewServerHandler()), + grpc.ChainStreamInterceptor( + propagator.StreamServerPropagator(tenant.Propagator{}), + tenant.StreamServerInterceptor, + metrics.StreamServerInterceptor(), + messagesize.StreamServerInterceptor, + internalerrs.LoggingStreamServerInterceptor(logger), + ), + grpc.ChainUnaryInterceptor( + propagator.UnaryServerPropagator(tenant.Propagator{}), + tenant.UnaryServerInterceptor, + metrics.UnaryServerInterceptor(), + messagesize.UnaryServerInterceptor, + internalerrs.LoggingUnaryServerInterceptor(logger), + ), + } + + opts = append(opts, additionalOpts...) + + // Ensure that the message size options are set last, so they override any other + // server-specific options that tweak the message size. + // + // The message size options are only provided if the environment variable is set. These options serve as an escape hatch, so they + // take precedence over everything else with a uniform size setting that's easy to reason about. + opts = append(opts, messagesize.MustGetServerMessageSizeFromEnv()...) + + s := grpc.NewServer(opts...) + reflection.Register(s) + return s +} + +// serviceMetricsOnce returns a singleton instance of the server metrics +// that are shared across all gRPC servers that this process creates. +// +// This function panics if the metrics cannot be registered with the default +// Prometheus registry. +var serverMetricsOnce = sync.OnceValue(func() *grpcprom.ServerMetrics { + serverMetrics := grpcprom.NewServerMetrics( + grpcprom.WithServerCounterOptions(), + grpcprom.WithServerHandlingTimeHistogram(), // record the overall response latency for a gRPC request) + ) + prometheus.DefaultRegisterer.MustRegister(serverMetrics) + return serverMetrics +}) diff --git a/grpc/grpcutil/util.go b/grpc/grpcutil/util.go index 34f91e51c..bdb1fa4f1 100644 --- a/grpc/grpcutil/util.go +++ b/grpc/grpcutil/util.go @@ -1,6 +1,13 @@ package grpcutil -import "strings" +import ( + "net/http" + "strings" + + "golang.org/x/net/http2" + "golang.org/x/net/http2/h2c" + "google.golang.org/grpc" +) // SplitMethodName splits a full gRPC method name (e.g. "/package.service/method") in to its individual components (service, method) // @@ -12,3 +19,21 @@ func SplitMethodName(fullMethod string) (string, string) { } return "unknown", "unknown" } + +// MultiplexGRPC takes a gRPC server and a plain HTTP handler and multiplexes the +// request handling. Any requests that declare themselves as gRPC requests are routed +// to the gRPC server, all others are routed to the httpHandler. +func MultiplexGRPC(grpcServer *grpc.Server, httpHandler http.Handler) http.Handler { + newHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.ProtoMajor == 2 && strings.Contains(r.Header.Get("Content-Type"), "application/grpc") { + grpcServer.ServeHTTP(w, r) + } else { + httpHandler.ServeHTTP(w, r) + } + }) + + // Until we enable TLS, we need to fall back to the h2c protocol, which is + // basically HTTP2 without TLS. The standard library does not implement the + // h2s protocol, so this hijacks h2s requests and handles them correctly. + return h2c.NewHandler(newHandler, &http2.Server{}) +} diff --git a/grpc/internalerrs/common_test.go b/grpc/internalerrs/common_test.go index 79c3c9646..aef587a24 100644 --- a/grpc/internalerrs/common_test.go +++ b/grpc/internalerrs/common_test.go @@ -8,16 +8,15 @@ import ( "strings" "testing" - "github.com/google/go-cmp/cmp/cmpopts" - "google.golang.org/protobuf/proto" - "google.golang.org/protobuf/types/known/timestamppb" - - newspb "github.com/sourcegraph/zoekt/grpc/testprotos/news/v1" - "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/timestamppb" + + newsv1 "github.com/sourcegraph/zoekt/grpc/testprotos/news/v1" ) func TestCallBackClientStream(t *testing.T) { @@ -97,7 +96,7 @@ func TestRequestSavingClientStream_InitialRequest(t *testing.T) { } // Setup: create a sample proto.Message for the request - request := &newspb.BinaryAttachment{ + request := &newsv1.BinaryAttachment{ Name: "sample_request", Data: []byte("sample data"), } @@ -111,7 +110,7 @@ func TestRequestSavingClientStream_InitialRequest(t *testing.T) { } // Check: assert InitialRequest returns the request - if diff := cmp.Diff(request, *stream.InitialRequest(), cmpopts.IgnoreUnexported(newspb.BinaryAttachment{})); diff != "" { + if diff := cmp.Diff(request, *stream.InitialRequest(), cmpopts.IgnoreUnexported(newsv1.BinaryAttachment{})); diff != "" { t.Fatalf("InitialRequest() (-want +got):\n%s", diff) } } @@ -208,7 +207,7 @@ func TestRequestSavingServerStream_InitialRequest(t *testing.T) { } // Setup: create a sample proto.Message for the request - request := &newspb.BinaryAttachment{ + request := &newsv1.BinaryAttachment{ Name: "sample_request", Data: []byte("sample data"), } @@ -222,7 +221,7 @@ func TestRequestSavingServerStream_InitialRequest(t *testing.T) { } // Check: assert InitialRequest returns the request - if diff := cmp.Diff(request, *stream.InitialRequest(), cmpopts.IgnoreUnexported(newspb.BinaryAttachment{})); diff != "" { + if diff := cmp.Diff(request, *stream.InitialRequest(), cmpopts.IgnoreUnexported(newsv1.BinaryAttachment{})); diff != "" { t.Fatalf("InitialRequest() (-want +got):\n%s", diff) } } @@ -369,12 +368,12 @@ func TestGRPCUnexpectedContentTypeChecker(t *testing.T) { func TestFindNonUTF8StringFields(t *testing.T) { // Create instances of the BinaryAttachment and KeyValueAttachment messages - invalidBinaryAttachment := &newspb.BinaryAttachment{ + invalidBinaryAttachment := &newsv1.BinaryAttachment{ Name: "inval\x80id_binary", Data: []byte("sample data"), } - invalidKeyValueAttachment := &newspb.KeyValueAttachment{ + invalidKeyValueAttachment := &newsv1.KeyValueAttachment{ Name: "inval\x80id_key_value", Data: map[string]string{ "key1": "value1", @@ -383,15 +382,15 @@ func TestFindNonUTF8StringFields(t *testing.T) { } // Create a sample Article message with invalid UTF-8 strings - article := &newspb.Article{ + article := &newsv1.Article{ Author: "inval\x80id_author", Date: ×tamppb.Timestamp{Seconds: 1234567890}, Title: "valid_title", Content: "valid_content", - Status: newspb.Article_STATUS_PUBLISHED, - Attachments: []*newspb.Attachment{ - {Contents: &newspb.Attachment_BinaryAttachment{BinaryAttachment: invalidBinaryAttachment}}, - {Contents: &newspb.Attachment_KeyValueAttachment{KeyValueAttachment: invalidKeyValueAttachment}}, + Status: newsv1.Article_STATUS_PUBLISHED, + Attachments: []*newsv1.Attachment{ + {Contents: &newsv1.Attachment_BinaryAttachment{BinaryAttachment: invalidBinaryAttachment}}, + {Contents: &newsv1.Attachment_KeyValueAttachment{KeyValueAttachment: invalidKeyValueAttachment}}, }, } diff --git a/grpc/internalerrs/logging.go b/grpc/internalerrs/logging.go index 361582edd..669d056a8 100644 --- a/grpc/internalerrs/logging.go +++ b/grpc/internalerrs/logging.go @@ -8,14 +8,13 @@ import ( "strings" "github.com/dustin/go-humanize" - "github.com/sourcegraph/zoekt/grpc/grpcutil" - - "google.golang.org/grpc/codes" - "google.golang.org/protobuf/proto" - "github.com/sourcegraph/log" "google.golang.org/grpc" + "google.golang.org/grpc/codes" "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" + + "github.com/sourcegraph/zoekt/grpc/grpcutil" ) var ( diff --git a/grpc/internalerrs/prometheus.go b/grpc/internalerrs/prometheus.go index 5c9fb9551..1c500202a 100644 --- a/grpc/internalerrs/prometheus.go +++ b/grpc/internalerrs/prometheus.go @@ -7,9 +7,10 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/sourcegraph/zoekt/grpc/grpcutil" "google.golang.org/grpc" "google.golang.org/grpc/codes" + + "github.com/sourcegraph/zoekt/grpc/grpcutil" ) var metricGRPCMethodStatus = promauto.NewCounterVec(prometheus.CounterOpts{ diff --git a/grpc/messagesize/prometheus.go b/grpc/messagesize/prometheus.go index 881f8ffb3..bfa246981 100644 --- a/grpc/messagesize/prometheus.go +++ b/grpc/messagesize/prometheus.go @@ -7,9 +7,10 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/sourcegraph/zoekt/grpc/grpcutil" "google.golang.org/grpc" "google.golang.org/protobuf/proto" + + "github.com/sourcegraph/zoekt/grpc/grpcutil" ) var ( diff --git a/grpc/messagesize/prometheus_test.go b/grpc/messagesize/prometheus_test.go index 193d607cb..16dd58ead 100644 --- a/grpc/messagesize/prometheus_test.go +++ b/grpc/messagesize/prometheus_test.go @@ -317,7 +317,7 @@ func TestUnaryClientInterceptor(t *testing.T) { }{ { name: "invoker successful - observe request size", - invoker: func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, opts ...grpc.CallOption) error { + invoker: func(ctx context.Context, method string, req, reply any, cc *grpc.ClientConn, opts ...grpc.CallOption) error { return nil }, @@ -328,7 +328,7 @@ func TestUnaryClientInterceptor(t *testing.T) { { name: "invoker error - observe a zero-sized response", - invoker: func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, opts ...grpc.CallOption) error { + invoker: func(ctx context.Context, method string, req, reply any, cc *grpc.ClientConn, opts ...grpc.CallOption) error { return sentinelError }, @@ -356,7 +356,7 @@ func TestUnaryClientInterceptor(t *testing.T) { var actualRequest any invokerCalled := false - invoker := func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, opts ...grpc.CallOption) error { + invoker := func(ctx context.Context, method string, req, reply any, cc *grpc.ClientConn, opts ...grpc.CallOption) error { invokerCalled = true actualRequest = req diff --git a/grpc/propagator/propagator.go b/grpc/propagator/propagator.go index 28e773f72..dc7dbe9bf 100644 --- a/grpc/propagator/propagator.go +++ b/grpc/propagator/propagator.go @@ -27,7 +27,7 @@ type Propagator interface { // should be configured with an interceptor that uses the same propagator. func StreamServerPropagator(prop Propagator) grpc.StreamServerInterceptor { return func( - srv interface{}, + srv any, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler, @@ -52,10 +52,10 @@ func StreamServerPropagator(prop Propagator) grpc.StreamServerInterceptor { func UnaryServerPropagator(prop Propagator) grpc.UnaryServerInterceptor { return func( ctx context.Context, - req interface{}, + req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler, - ) (resp interface{}, err error) { + ) (resp any, err error) { md, ok := metadata.FromIncomingContext(ctx) if ok { ctx, err = prop.InjectContext(ctx, md) diff --git a/grpc/protos/README.md b/grpc/protos/README.md new file mode 100644 index 000000000..deb84386a --- /dev/null +++ b/grpc/protos/README.md @@ -0,0 +1,11 @@ +# Webserver protobuf definitions + +This directory contains protobuf definitions for the webserver gRPC API. + +To generate the Go code, run this script from the repository root: + +```sh +./gen-proto.sh +``` + +Note: this script will regenerate all protos in the project, not just the ones in this directory. diff --git a/grpc/protos/zoekt/webserver/v1/query.pb.go b/grpc/protos/zoekt/webserver/v1/query.pb.go index 2cd1694d6..8fc70afca 100644 --- a/grpc/protos/zoekt/webserver/v1/query.pb.go +++ b/grpc/protos/zoekt/webserver/v1/query.pb.go @@ -158,6 +158,7 @@ type Q struct { // *Q_Not // *Q_Branch // *Q_Boost + // *Q_Meta Query isQ_Query `protobuf_oneof:"query"` } @@ -326,6 +327,13 @@ func (x *Q) GetBoost() *Boost { return nil } +func (x *Q) GetMeta() *Meta { + if x, ok := x.GetQuery().(*Q_Meta); ok { + return x.Meta + } + return nil +} + type isQ_Query interface { isQ_Query() } @@ -402,6 +410,10 @@ type Q_Boost struct { Boost *Boost `protobuf:"bytes,18,opt,name=boost,proto3,oneof"` } +type Q_Meta struct { + Meta *Meta `protobuf:"bytes,19,opt,name=meta,proto3,oneof"` +} + func (*Q_RawConfig) isQ_Query() {} func (*Q_Regexp) isQ_Query() {} @@ -438,6 +450,8 @@ func (*Q_Branch) isQ_Query() {} func (*Q_Boost) isQ_Query() {} +func (*Q_Meta) isQ_Query() {} + // RawConfig filters repositories based on their encoded RawConfig map. type RawConfig struct { state protoimpl.MessageState @@ -1385,13 +1399,69 @@ func (x *Boost) GetBoost() float64 { return 0 } +// Meta allows filtering results by repo metadata. +type Meta struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Key string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` + Value string `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"` +} + +func (x *Meta) Reset() { + *x = Meta{} + if protoimpl.UnsafeEnabled { + mi := &file_zoekt_webserver_v1_query_proto_msgTypes[19] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Meta) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Meta) ProtoMessage() {} + +func (x *Meta) ProtoReflect() protoreflect.Message { + mi := &file_zoekt_webserver_v1_query_proto_msgTypes[19] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Meta.ProtoReflect.Descriptor instead. +func (*Meta) Descriptor() ([]byte, []int) { + return file_zoekt_webserver_v1_query_proto_rawDescGZIP(), []int{19} +} + +func (x *Meta) GetKey() string { + if x != nil { + return x.Key + } + return "" +} + +func (x *Meta) GetValue() string { + if x != nil { + return x.Value + } + return "" +} + var File_zoekt_webserver_v1_query_proto protoreflect.FileDescriptor var file_zoekt_webserver_v1_query_proto_rawDesc = []byte{ 0x0a, 0x1e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2f, 0x76, 0x31, 0x2f, 0x71, 0x75, 0x65, 0x72, 0x79, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x12, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, - 0x72, 0x2e, 0x76, 0x31, 0x22, 0xe2, 0x07, 0x0a, 0x01, 0x51, 0x12, 0x3e, 0x0a, 0x0a, 0x72, 0x61, + 0x72, 0x2e, 0x76, 0x31, 0x22, 0x92, 0x08, 0x0a, 0x01, 0x51, 0x12, 0x3e, 0x0a, 0x0a, 0x72, 0x61, 0x77, 0x5f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x52, 0x61, 0x77, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x48, 0x00, 0x52, @@ -1453,6 +1523,9 @@ var file_zoekt_webserver_v1_query_proto_rawDesc = []byte{ 0x05, 0x62, 0x6f, 0x6f, 0x73, 0x74, 0x18, 0x12, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x42, 0x6f, 0x6f, 0x73, 0x74, 0x48, 0x00, 0x52, 0x05, 0x62, 0x6f, 0x6f, 0x73, 0x74, + 0x12, 0x2e, 0x0a, 0x04, 0x6d, 0x65, 0x74, 0x61, 0x18, 0x13, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, + 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, + 0x2e, 0x76, 0x31, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x48, 0x00, 0x52, 0x04, 0x6d, 0x65, 0x74, 0x61, 0x42, 0x07, 0x0a, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x22, 0xef, 0x01, 0x0a, 0x09, 0x52, 0x61, 0x77, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x38, 0x0a, 0x05, 0x66, 0x6c, 0x61, 0x67, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0e, 0x32, 0x22, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, @@ -1546,7 +1619,10 @@ var file_zoekt_webserver_v1_query_proto_rawDesc = []byte{ 0x20, 0x01, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x51, 0x52, 0x05, 0x63, 0x68, 0x69, 0x6c, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x62, 0x6f, 0x6f, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x01, 0x52, 0x05, 0x62, 0x6f, 0x6f, 0x73, 0x74, 0x42, 0x3d, 0x5a, 0x3b, 0x67, 0x69, 0x74, 0x68, + 0x01, 0x52, 0x05, 0x62, 0x6f, 0x6f, 0x73, 0x74, 0x22, 0x2e, 0x0a, 0x04, 0x4d, 0x65, 0x74, 0x61, + 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, + 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x42, 0x3d, 0x5a, 0x3b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, 0x77, 0x65, 0x62, 0x73, 0x65, @@ -1566,7 +1642,7 @@ func file_zoekt_webserver_v1_query_proto_rawDescGZIP() []byte { } var file_zoekt_webserver_v1_query_proto_enumTypes = make([]protoimpl.EnumInfo, 2) -var file_zoekt_webserver_v1_query_proto_msgTypes = make([]protoimpl.MessageInfo, 20) +var file_zoekt_webserver_v1_query_proto_msgTypes = make([]protoimpl.MessageInfo, 21) var file_zoekt_webserver_v1_query_proto_goTypes = []interface{}{ (RawConfig_Flag)(0), // 0: zoekt.webserver.v1.RawConfig.Flag (Type_Kind)(0), // 1: zoekt.webserver.v1.Type.Kind @@ -1589,7 +1665,8 @@ var file_zoekt_webserver_v1_query_proto_goTypes = []interface{}{ (*Not)(nil), // 18: zoekt.webserver.v1.Not (*Branch)(nil), // 19: zoekt.webserver.v1.Branch (*Boost)(nil), // 20: zoekt.webserver.v1.Boost - nil, // 21: zoekt.webserver.v1.RepoSet.SetEntry + (*Meta)(nil), // 21: zoekt.webserver.v1.Meta + nil, // 22: zoekt.webserver.v1.RepoSet.SetEntry } var file_zoekt_webserver_v1_query_proto_depIdxs = []int32{ 3, // 0: zoekt.webserver.v1.Q.raw_config:type_name -> zoekt.webserver.v1.RawConfig @@ -1609,21 +1686,22 @@ var file_zoekt_webserver_v1_query_proto_depIdxs = []int32{ 18, // 14: zoekt.webserver.v1.Q.not:type_name -> zoekt.webserver.v1.Not 19, // 15: zoekt.webserver.v1.Q.branch:type_name -> zoekt.webserver.v1.Branch 20, // 16: zoekt.webserver.v1.Q.boost:type_name -> zoekt.webserver.v1.Boost - 0, // 17: zoekt.webserver.v1.RawConfig.flags:type_name -> zoekt.webserver.v1.RawConfig.Flag - 2, // 18: zoekt.webserver.v1.Symbol.expr:type_name -> zoekt.webserver.v1.Q - 10, // 19: zoekt.webserver.v1.BranchesRepos.list:type_name -> zoekt.webserver.v1.BranchRepos - 21, // 20: zoekt.webserver.v1.RepoSet.set:type_name -> zoekt.webserver.v1.RepoSet.SetEntry - 2, // 21: zoekt.webserver.v1.Type.child:type_name -> zoekt.webserver.v1.Q - 1, // 22: zoekt.webserver.v1.Type.type:type_name -> zoekt.webserver.v1.Type.Kind - 2, // 23: zoekt.webserver.v1.And.children:type_name -> zoekt.webserver.v1.Q - 2, // 24: zoekt.webserver.v1.Or.children:type_name -> zoekt.webserver.v1.Q - 2, // 25: zoekt.webserver.v1.Not.child:type_name -> zoekt.webserver.v1.Q - 2, // 26: zoekt.webserver.v1.Boost.child:type_name -> zoekt.webserver.v1.Q - 27, // [27:27] is the sub-list for method output_type - 27, // [27:27] is the sub-list for method input_type - 27, // [27:27] is the sub-list for extension type_name - 27, // [27:27] is the sub-list for extension extendee - 0, // [0:27] is the sub-list for field type_name + 21, // 17: zoekt.webserver.v1.Q.meta:type_name -> zoekt.webserver.v1.Meta + 0, // 18: zoekt.webserver.v1.RawConfig.flags:type_name -> zoekt.webserver.v1.RawConfig.Flag + 2, // 19: zoekt.webserver.v1.Symbol.expr:type_name -> zoekt.webserver.v1.Q + 10, // 20: zoekt.webserver.v1.BranchesRepos.list:type_name -> zoekt.webserver.v1.BranchRepos + 22, // 21: zoekt.webserver.v1.RepoSet.set:type_name -> zoekt.webserver.v1.RepoSet.SetEntry + 2, // 22: zoekt.webserver.v1.Type.child:type_name -> zoekt.webserver.v1.Q + 1, // 23: zoekt.webserver.v1.Type.type:type_name -> zoekt.webserver.v1.Type.Kind + 2, // 24: zoekt.webserver.v1.And.children:type_name -> zoekt.webserver.v1.Q + 2, // 25: zoekt.webserver.v1.Or.children:type_name -> zoekt.webserver.v1.Q + 2, // 26: zoekt.webserver.v1.Not.child:type_name -> zoekt.webserver.v1.Q + 2, // 27: zoekt.webserver.v1.Boost.child:type_name -> zoekt.webserver.v1.Q + 28, // [28:28] is the sub-list for method output_type + 28, // [28:28] is the sub-list for method input_type + 28, // [28:28] is the sub-list for extension type_name + 28, // [28:28] is the sub-list for extension extendee + 0, // [0:28] is the sub-list for field type_name } func init() { file_zoekt_webserver_v1_query_proto_init() } @@ -1860,6 +1938,18 @@ func file_zoekt_webserver_v1_query_proto_init() { return nil } } + file_zoekt_webserver_v1_query_proto_msgTypes[19].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Meta); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } } file_zoekt_webserver_v1_query_proto_msgTypes[0].OneofWrappers = []interface{}{ (*Q_RawConfig)(nil), @@ -1880,6 +1970,7 @@ func file_zoekt_webserver_v1_query_proto_init() { (*Q_Not)(nil), (*Q_Branch)(nil), (*Q_Boost)(nil), + (*Q_Meta)(nil), } type x struct{} out := protoimpl.TypeBuilder{ @@ -1887,7 +1978,7 @@ func file_zoekt_webserver_v1_query_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_zoekt_webserver_v1_query_proto_rawDesc, NumEnums: 2, - NumMessages: 20, + NumMessages: 21, NumExtensions: 0, NumServices: 0, }, diff --git a/grpc/protos/zoekt/webserver/v1/query.proto b/grpc/protos/zoekt/webserver/v1/query.proto index 308ad461e..99931212b 100644 --- a/grpc/protos/zoekt/webserver/v1/query.proto +++ b/grpc/protos/zoekt/webserver/v1/query.proto @@ -24,6 +24,7 @@ message Q { Not not = 16; Branch branch = 17; Boost boost = 18; + Meta meta = 19; } } @@ -148,3 +149,9 @@ message Boost { Q child = 1; double boost = 2; } + +// Meta allows filtering results by repo metadata. +message Meta { + string key = 1; + string value = 2; +} diff --git a/grpc/protos/zoekt/webserver/v1/webserver.pb.go b/grpc/protos/zoekt/webserver/v1/webserver.pb.go index fb4511bc6..2c4b47b88 100644 --- a/grpc/protos/zoekt/webserver/v1/webserver.pb.go +++ b/grpc/protos/zoekt/webserver/v1/webserver.pb.go @@ -813,6 +813,8 @@ type Repository struct { FileTombstones []string `protobuf:"bytes,17,rep,name=file_tombstones,json=fileTombstones,proto3" json:"file_tombstones,omitempty"` // tenant_id is the tenant ID of the repository. TenantId int64 `protobuf:"varint,18,opt,name=tenant_id,json=tenantId,proto3" json:"tenant_id,omitempty"` + // Additional metadata about the repository. + Metadata map[string]string `protobuf:"bytes,19,rep,name=metadata,proto3" json:"metadata,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` } func (x *Repository) Reset() { @@ -973,6 +975,13 @@ func (x *Repository) GetTenantId() int64 { return 0 } +func (x *Repository) GetMetadata() map[string]string { + if x != nil { + return x.Metadata + } + return nil +} + type IndexMetadata struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -2430,7 +2439,7 @@ var file_zoekt_webserver_v1_webserver_proto_rawDesc = []byte{ 0x74, 0x61, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x52, 0x65, 0x70, 0x6f, 0x53, 0x74, 0x61, 0x74, 0x73, - 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x73, 0x22, 0x8f, 0x07, 0x0a, 0x0a, 0x52, 0x65, 0x70, 0x6f, + 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x73, 0x22, 0x96, 0x08, 0x0a, 0x0a, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, @@ -2477,288 +2486,297 @@ var file_zoekt_webserver_v1_webserver_proto_rawDesc = []byte{ 0x6f, 0x6d, 0x62, 0x73, 0x74, 0x6f, 0x6e, 0x65, 0x73, 0x18, 0x11, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0e, 0x66, 0x69, 0x6c, 0x65, 0x54, 0x6f, 0x6d, 0x62, 0x73, 0x74, 0x6f, 0x6e, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x12, 0x20, 0x01, - 0x28, 0x03, 0x52, 0x08, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x49, 0x64, 0x1a, 0x5d, 0x0a, 0x0f, - 0x53, 0x75, 0x62, 0x52, 0x65, 0x70, 0x6f, 0x4d, 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, - 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, - 0x79, 0x12, 0x34, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, - 0x32, 0x1e, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, - 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, - 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x3c, 0x0a, 0x0e, 0x52, - 0x61, 0x77, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, - 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, - 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xd6, 0x03, 0x0a, 0x0d, 0x49, 0x6e, - 0x64, 0x65, 0x78, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x30, 0x0a, 0x14, 0x69, - 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x73, - 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x12, 0x69, 0x6e, 0x64, 0x65, 0x78, - 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x32, 0x0a, - 0x15, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x5f, 0x76, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x13, 0x69, 0x6e, - 0x64, 0x65, 0x78, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, - 0x6e, 0x12, 0x37, 0x0a, 0x18, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x6d, 0x69, 0x6e, 0x5f, 0x72, - 0x65, 0x61, 0x64, 0x65, 0x72, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x03, 0x52, 0x15, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x4d, 0x69, 0x6e, 0x52, 0x65, 0x61, - 0x64, 0x65, 0x72, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x39, 0x0a, 0x0a, 0x69, 0x6e, - 0x64, 0x65, 0x78, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, - 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, - 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x09, 0x69, 0x6e, 0x64, 0x65, - 0x78, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x70, 0x6c, 0x61, 0x69, 0x6e, 0x5f, 0x61, - 0x73, 0x63, 0x69, 0x69, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x70, 0x6c, 0x61, 0x69, - 0x6e, 0x41, 0x73, 0x63, 0x69, 0x69, 0x12, 0x55, 0x0a, 0x0c, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, - 0x67, 0x65, 0x5f, 0x6d, 0x61, 0x70, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x32, 0x2e, 0x7a, - 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, - 0x31, 0x2e, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, - 0x4c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x4d, 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, - 0x52, 0x0b, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x4d, 0x61, 0x70, 0x12, 0x23, 0x0a, - 0x0d, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x07, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, - 0x69, 0x64, 0x1a, 0x3e, 0x0a, 0x10, 0x4c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x4d, 0x61, - 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, - 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, - 0x38, 0x01, 0x22, 0xa1, 0x01, 0x0a, 0x14, 0x4d, 0x69, 0x6e, 0x69, 0x6d, 0x61, 0x6c, 0x52, 0x65, - 0x70, 0x6f, 0x4c, 0x69, 0x73, 0x74, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x1f, 0x0a, 0x0b, 0x68, - 0x61, 0x73, 0x5f, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, - 0x52, 0x0a, 0x68, 0x61, 0x73, 0x53, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x12, 0x40, 0x0a, 0x08, - 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, + 0x28, 0x03, 0x52, 0x08, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x48, 0x0a, 0x08, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x13, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2c, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, - 0x2e, 0x76, 0x31, 0x2e, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x42, 0x72, - 0x61, 0x6e, 0x63, 0x68, 0x52, 0x08, 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x65, 0x73, 0x12, 0x26, - 0x0a, 0x0f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, 0x75, 0x6e, 0x69, - 0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0d, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x54, 0x69, - 0x6d, 0x65, 0x55, 0x6e, 0x69, 0x78, 0x22, 0x40, 0x0a, 0x10, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, - 0x74, 0x6f, 0x72, 0x79, 0x42, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, - 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x18, - 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0xcd, 0x02, 0x0a, 0x09, 0x52, 0x65, 0x70, - 0x6f, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x12, 0x16, 0x0a, 0x06, - 0x73, 0x68, 0x61, 0x72, 0x64, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, 0x73, 0x68, - 0x61, 0x72, 0x64, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, - 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, - 0x74, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x62, 0x79, 0x74, 0x65, - 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x42, 0x79, - 0x74, 0x65, 0x73, 0x12, 0x23, 0x0a, 0x0d, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x5f, 0x62, - 0x79, 0x74, 0x65, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, 0x63, 0x6f, 0x6e, 0x74, - 0x65, 0x6e, 0x74, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x65, 0x77, 0x5f, - 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, - 0x04, 0x52, 0x0d, 0x6e, 0x65, 0x77, 0x4c, 0x69, 0x6e, 0x65, 0x73, 0x43, 0x6f, 0x75, 0x6e, 0x74, - 0x12, 0x42, 0x0a, 0x1e, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x62, 0x72, 0x61, 0x6e, - 0x63, 0x68, 0x5f, 0x6e, 0x65, 0x77, 0x5f, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x5f, 0x63, 0x6f, 0x75, - 0x6e, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x04, 0x52, 0x1a, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, - 0x74, 0x42, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x4e, 0x65, 0x77, 0x4c, 0x69, 0x6e, 0x65, 0x73, 0x43, - 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x42, 0x0a, 0x1e, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x5f, 0x62, 0x72, - 0x61, 0x6e, 0x63, 0x68, 0x65, 0x73, 0x5f, 0x6e, 0x65, 0x77, 0x5f, 0x6c, 0x69, 0x6e, 0x65, 0x73, - 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x04, 0x52, 0x1a, 0x6f, 0x74, - 0x68, 0x65, 0x72, 0x42, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x65, 0x73, 0x4e, 0x65, 0x77, 0x4c, 0x69, - 0x6e, 0x65, 0x73, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x22, 0xa9, 0x07, 0x0a, 0x05, 0x53, 0x74, 0x61, - 0x74, 0x73, 0x12, 0x30, 0x0a, 0x14, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x5f, 0x62, 0x79, - 0x74, 0x65, 0x73, 0x5f, 0x6c, 0x6f, 0x61, 0x64, 0x65, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, - 0x52, 0x12, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x42, 0x79, 0x74, 0x65, 0x73, 0x4c, 0x6f, - 0x61, 0x64, 0x65, 0x64, 0x12, 0x2c, 0x0a, 0x12, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x62, 0x79, - 0x74, 0x65, 0x73, 0x5f, 0x6c, 0x6f, 0x61, 0x64, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, - 0x52, 0x10, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x42, 0x79, 0x74, 0x65, 0x73, 0x4c, 0x6f, 0x61, 0x64, - 0x65, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x63, 0x72, 0x61, 0x73, 0x68, 0x65, 0x73, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x03, 0x52, 0x07, 0x63, 0x72, 0x61, 0x73, 0x68, 0x65, 0x73, 0x12, 0x35, 0x0a, 0x08, - 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, - 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, - 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x12, 0x1d, 0x0a, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x63, 0x6f, 0x75, 0x6e, - 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x66, 0x69, 0x6c, 0x65, 0x43, 0x6f, 0x75, - 0x6e, 0x74, 0x12, 0x34, 0x0a, 0x16, 0x73, 0x68, 0x61, 0x72, 0x64, 0x5f, 0x66, 0x69, 0x6c, 0x65, - 0x73, 0x5f, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x64, 0x65, 0x72, 0x65, 0x64, 0x18, 0x06, 0x20, 0x01, - 0x28, 0x03, 0x52, 0x14, 0x73, 0x68, 0x61, 0x72, 0x64, 0x46, 0x69, 0x6c, 0x65, 0x73, 0x43, 0x6f, - 0x6e, 0x73, 0x69, 0x64, 0x65, 0x72, 0x65, 0x64, 0x12, 0x29, 0x0a, 0x10, 0x66, 0x69, 0x6c, 0x65, - 0x73, 0x5f, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x64, 0x65, 0x72, 0x65, 0x64, 0x18, 0x07, 0x20, 0x01, - 0x28, 0x03, 0x52, 0x0f, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x43, 0x6f, 0x6e, 0x73, 0x69, 0x64, 0x65, - 0x72, 0x65, 0x64, 0x12, 0x21, 0x0a, 0x0c, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x5f, 0x6c, 0x6f, 0x61, - 0x64, 0x65, 0x64, 0x18, 0x08, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0b, 0x66, 0x69, 0x6c, 0x65, 0x73, - 0x4c, 0x6f, 0x61, 0x64, 0x65, 0x64, 0x12, 0x23, 0x0a, 0x0d, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x5f, - 0x73, 0x6b, 0x69, 0x70, 0x70, 0x65, 0x64, 0x18, 0x09, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, 0x66, - 0x69, 0x6c, 0x65, 0x73, 0x53, 0x6b, 0x69, 0x70, 0x70, 0x65, 0x64, 0x12, 0x25, 0x0a, 0x0e, 0x73, - 0x68, 0x61, 0x72, 0x64, 0x73, 0x5f, 0x73, 0x63, 0x61, 0x6e, 0x6e, 0x65, 0x64, 0x18, 0x0a, 0x20, - 0x01, 0x28, 0x03, 0x52, 0x0d, 0x73, 0x68, 0x61, 0x72, 0x64, 0x73, 0x53, 0x63, 0x61, 0x6e, 0x6e, - 0x65, 0x64, 0x12, 0x25, 0x0a, 0x0e, 0x73, 0x68, 0x61, 0x72, 0x64, 0x73, 0x5f, 0x73, 0x6b, 0x69, - 0x70, 0x70, 0x65, 0x64, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0d, 0x73, 0x68, 0x61, 0x72, - 0x64, 0x73, 0x53, 0x6b, 0x69, 0x70, 0x70, 0x65, 0x64, 0x12, 0x32, 0x0a, 0x15, 0x73, 0x68, 0x61, - 0x72, 0x64, 0x73, 0x5f, 0x73, 0x6b, 0x69, 0x70, 0x70, 0x65, 0x64, 0x5f, 0x66, 0x69, 0x6c, 0x74, - 0x65, 0x72, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x03, 0x52, 0x13, 0x73, 0x68, 0x61, 0x72, 0x64, 0x73, - 0x53, 0x6b, 0x69, 0x70, 0x70, 0x65, 0x64, 0x46, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x12, 0x1f, 0x0a, - 0x0b, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x0d, 0x20, 0x01, - 0x28, 0x03, 0x52, 0x0a, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x23, - 0x0a, 0x0d, 0x6e, 0x67, 0x72, 0x61, 0x6d, 0x5f, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x18, - 0x0e, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, 0x6e, 0x67, 0x72, 0x61, 0x6d, 0x4d, 0x61, 0x74, 0x63, - 0x68, 0x65, 0x73, 0x12, 0x2d, 0x0a, 0x04, 0x77, 0x61, 0x69, 0x74, 0x18, 0x0f, 0x20, 0x01, 0x28, - 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x04, 0x77, 0x61, - 0x69, 0x74, 0x12, 0x51, 0x0a, 0x17, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5f, 0x74, 0x72, 0x65, 0x65, - 0x5f, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x13, 0x20, - 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x15, - 0x6d, 0x61, 0x74, 0x63, 0x68, 0x54, 0x72, 0x65, 0x65, 0x43, 0x6f, 0x6e, 0x73, 0x74, 0x72, 0x75, - 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x45, 0x0a, 0x11, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5f, 0x74, - 0x72, 0x65, 0x65, 0x5f, 0x73, 0x65, 0x61, 0x72, 0x63, 0x68, 0x18, 0x14, 0x20, 0x01, 0x28, 0x0b, - 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, - 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0f, 0x6d, 0x61, 0x74, - 0x63, 0x68, 0x54, 0x72, 0x65, 0x65, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x12, 0x2d, 0x0a, 0x12, - 0x72, 0x65, 0x67, 0x65, 0x78, 0x70, 0x73, 0x5f, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x64, 0x65, 0x72, - 0x65, 0x64, 0x18, 0x10, 0x20, 0x01, 0x28, 0x03, 0x52, 0x11, 0x72, 0x65, 0x67, 0x65, 0x78, 0x70, - 0x73, 0x43, 0x6f, 0x6e, 0x73, 0x69, 0x64, 0x65, 0x72, 0x65, 0x64, 0x12, 0x42, 0x0a, 0x0c, 0x66, - 0x6c, 0x75, 0x73, 0x68, 0x5f, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x18, 0x11, 0x20, 0x01, 0x28, - 0x0e, 0x32, 0x1f, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, - 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x46, 0x6c, 0x75, 0x73, 0x68, 0x52, 0x65, 0x61, 0x73, - 0x6f, 0x6e, 0x52, 0x0b, 0x66, 0x6c, 0x75, 0x73, 0x68, 0x52, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x12, - 0x23, 0x0a, 0x0d, 0x6e, 0x67, 0x72, 0x61, 0x6d, 0x5f, 0x6c, 0x6f, 0x6f, 0x6b, 0x75, 0x70, 0x73, - 0x18, 0x12, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, 0x6e, 0x67, 0x72, 0x61, 0x6d, 0x4c, 0x6f, 0x6f, - 0x6b, 0x75, 0x70, 0x73, 0x22, 0x58, 0x0a, 0x08, 0x50, 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, - 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x69, 0x6f, 0x72, 0x69, 0x74, 0x79, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x01, 0x52, 0x08, 0x70, 0x72, 0x69, 0x6f, 0x72, 0x69, 0x74, 0x79, 0x12, 0x30, 0x0a, 0x14, - 0x6d, 0x61, 0x78, 0x5f, 0x70, 0x65, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x5f, 0x70, 0x72, 0x69, 0x6f, - 0x72, 0x69, 0x74, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x01, 0x52, 0x12, 0x6d, 0x61, 0x78, 0x50, - 0x65, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x50, 0x72, 0x69, 0x6f, 0x72, 0x69, 0x74, 0x79, 0x22, 0xb9, - 0x04, 0x0a, 0x09, 0x46, 0x69, 0x6c, 0x65, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x12, 0x14, 0x0a, 0x05, - 0x73, 0x63, 0x6f, 0x72, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x01, 0x52, 0x05, 0x73, 0x63, 0x6f, - 0x72, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x64, 0x65, 0x62, 0x75, 0x67, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x64, 0x65, 0x62, 0x75, 0x67, 0x12, 0x1b, 0x0a, 0x09, 0x66, 0x69, 0x6c, 0x65, - 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x08, 0x66, 0x69, 0x6c, - 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, - 0x6f, 0x72, 0x79, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x72, 0x65, 0x70, 0x6f, 0x73, - 0x69, 0x74, 0x6f, 0x72, 0x79, 0x12, 0x1a, 0x0a, 0x08, 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x65, - 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x65, - 0x73, 0x12, 0x40, 0x0a, 0x0c, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, - 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, - 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4c, 0x69, 0x6e, - 0x65, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x0b, 0x6c, 0x69, 0x6e, 0x65, 0x4d, 0x61, 0x74, 0x63, - 0x68, 0x65, 0x73, 0x12, 0x43, 0x0a, 0x0d, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x5f, 0x6d, 0x61, 0x74, - 0x63, 0x68, 0x65, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x7a, 0x6f, 0x65, + 0x2e, 0x76, 0x31, 0x2e, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x2e, 0x4d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x1a, 0x5d, 0x0a, 0x0f, 0x53, 0x75, 0x62, 0x52, 0x65, 0x70, + 0x6f, 0x4d, 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x34, 0x0a, 0x05, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, - 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x0c, 0x63, 0x68, 0x75, 0x6e, - 0x6b, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x12, 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x70, 0x6f, - 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x5f, 0x69, 0x64, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0d, 0x52, - 0x0c, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x49, 0x64, 0x12, 0x2f, 0x0a, - 0x13, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x5f, 0x70, 0x72, 0x69, 0x6f, - 0x72, 0x69, 0x74, 0x79, 0x18, 0x09, 0x20, 0x01, 0x28, 0x01, 0x52, 0x12, 0x72, 0x65, 0x70, 0x6f, - 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x50, 0x72, 0x69, 0x6f, 0x72, 0x69, 0x74, 0x79, 0x12, 0x18, - 0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0c, 0x52, - 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x63, 0x68, 0x65, 0x63, - 0x6b, 0x73, 0x75, 0x6d, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x08, 0x63, 0x68, 0x65, 0x63, - 0x6b, 0x73, 0x75, 0x6d, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, - 0x18, 0x0c, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, - 0x12, 0x2e, 0x0a, 0x13, 0x73, 0x75, 0x62, 0x5f, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, - 0x72, 0x79, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x73, - 0x75, 0x62, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x4e, 0x61, 0x6d, 0x65, - 0x12, 0x2e, 0x0a, 0x13, 0x73, 0x75, 0x62, 0x5f, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, - 0x72, 0x79, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x73, - 0x75, 0x62, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x50, 0x61, 0x74, 0x68, - 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x0f, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0xca, 0x02, 0x0a, 0x09, 0x4c, - 0x69, 0x6e, 0x65, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6c, 0x69, 0x6e, 0x65, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x6c, 0x69, 0x6e, 0x65, 0x12, 0x1d, 0x0a, 0x0a, - 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, - 0x52, 0x09, 0x6c, 0x69, 0x6e, 0x65, 0x53, 0x74, 0x61, 0x72, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x6c, - 0x69, 0x6e, 0x65, 0x5f, 0x65, 0x6e, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x6c, - 0x69, 0x6e, 0x65, 0x45, 0x6e, 0x64, 0x12, 0x1f, 0x0a, 0x0b, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x6e, - 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x6c, 0x69, 0x6e, - 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x16, 0x0a, 0x06, 0x62, 0x65, 0x66, 0x6f, 0x72, - 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x62, 0x65, 0x66, 0x6f, 0x72, 0x65, 0x12, - 0x14, 0x0a, 0x05, 0x61, 0x66, 0x74, 0x65, 0x72, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, - 0x61, 0x66, 0x74, 0x65, 0x72, 0x12, 0x1b, 0x0a, 0x09, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x6e, 0x61, - 0x6d, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x4e, 0x61, - 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, - 0x01, 0x52, 0x05, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x64, 0x65, 0x62, 0x75, - 0x67, 0x5f, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x64, - 0x65, 0x62, 0x75, 0x67, 0x53, 0x63, 0x6f, 0x72, 0x65, 0x12, 0x4c, 0x0a, 0x0e, 0x6c, 0x69, 0x6e, - 0x65, 0x5f, 0x66, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x0a, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x25, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, - 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4c, 0x69, 0x6e, 0x65, 0x46, 0x72, 0x61, 0x67, 0x6d, - 0x65, 0x6e, 0x74, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x0d, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x72, - 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x22, 0xc5, 0x01, 0x0a, 0x11, 0x4c, 0x69, 0x6e, 0x65, - 0x46, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x12, 0x1f, 0x0a, - 0x0b, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x03, 0x52, 0x0a, 0x6c, 0x69, 0x6e, 0x65, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x16, - 0x0a, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, - 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5f, - 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0b, 0x6d, 0x61, - 0x74, 0x63, 0x68, 0x4c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x44, 0x0a, 0x0b, 0x73, 0x79, 0x6d, - 0x62, 0x6f, 0x6c, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, + 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x3c, 0x0a, 0x0e, 0x52, 0x61, 0x77, 0x43, 0x6f, 0x6e, 0x66, + 0x69, 0x67, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, + 0x02, 0x38, 0x01, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, + 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, + 0x22, 0xd6, 0x03, 0x0a, 0x0d, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x12, 0x30, 0x0a, 0x14, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x66, 0x6f, 0x72, 0x6d, + 0x61, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x12, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x56, 0x65, 0x72, + 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x32, 0x0a, 0x15, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x66, 0x65, + 0x61, 0x74, 0x75, 0x72, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x03, 0x52, 0x13, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, + 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x37, 0x0a, 0x18, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x5f, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x65, 0x61, 0x64, 0x65, 0x72, 0x5f, 0x76, 0x65, 0x72, + 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x15, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x4d, 0x69, 0x6e, 0x52, 0x65, 0x61, 0x64, 0x65, 0x72, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, + 0x6e, 0x12, 0x39, 0x0a, 0x0a, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, + 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, + 0x70, 0x52, 0x09, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x1f, 0x0a, 0x0b, + 0x70, 0x6c, 0x61, 0x69, 0x6e, 0x5f, 0x61, 0x73, 0x63, 0x69, 0x69, 0x18, 0x05, 0x20, 0x01, 0x28, + 0x08, 0x52, 0x0a, 0x70, 0x6c, 0x61, 0x69, 0x6e, 0x41, 0x73, 0x63, 0x69, 0x69, 0x12, 0x55, 0x0a, + 0x0c, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x5f, 0x6d, 0x61, 0x70, 0x18, 0x06, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x32, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, + 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x4d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x4d, + 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0b, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, + 0x65, 0x4d, 0x61, 0x70, 0x12, 0x23, 0x0a, 0x0d, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x5f, 0x76, 0x65, + 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x7a, 0x6f, 0x65, + 0x6b, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, + 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x1a, 0x3e, 0x0a, 0x10, 0x4c, 0x61, 0x6e, + 0x67, 0x75, 0x61, 0x67, 0x65, 0x4d, 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, + 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, + 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, + 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xa1, 0x01, 0x0a, 0x14, 0x4d, 0x69, + 0x6e, 0x69, 0x6d, 0x61, 0x6c, 0x52, 0x65, 0x70, 0x6f, 0x4c, 0x69, 0x73, 0x74, 0x45, 0x6e, 0x74, + 0x72, 0x79, 0x12, 0x1f, 0x0a, 0x0b, 0x68, 0x61, 0x73, 0x5f, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x68, 0x61, 0x73, 0x53, 0x79, 0x6d, 0x62, + 0x6f, 0x6c, 0x73, 0x12, 0x40, 0x0a, 0x08, 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x65, 0x73, 0x18, + 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, + 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x52, 0x65, 0x70, 0x6f, 0x73, + 0x69, 0x74, 0x6f, 0x72, 0x79, 0x42, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x52, 0x08, 0x62, 0x72, 0x61, + 0x6e, 0x63, 0x68, 0x65, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x74, + 0x69, 0x6d, 0x65, 0x5f, 0x75, 0x6e, 0x69, 0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0d, + 0x69, 0x6e, 0x64, 0x65, 0x78, 0x54, 0x69, 0x6d, 0x65, 0x55, 0x6e, 0x69, 0x78, 0x22, 0x40, 0x0a, + 0x10, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x42, 0x72, 0x61, 0x6e, 0x63, + 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, + 0xcd, 0x02, 0x0a, 0x09, 0x52, 0x65, 0x70, 0x6f, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x14, 0x0a, + 0x05, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x72, 0x65, + 0x70, 0x6f, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x68, 0x61, 0x72, 0x64, 0x73, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x03, 0x52, 0x06, 0x73, 0x68, 0x61, 0x72, 0x64, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x64, + 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, + 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x69, 0x6e, 0x64, + 0x65, 0x78, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, + 0x69, 0x6e, 0x64, 0x65, 0x78, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x23, 0x0a, 0x0d, 0x63, 0x6f, + 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x0c, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, + 0x26, 0x0a, 0x0f, 0x6e, 0x65, 0x77, 0x5f, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x5f, 0x63, 0x6f, 0x75, + 0x6e, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0d, 0x6e, 0x65, 0x77, 0x4c, 0x69, 0x6e, + 0x65, 0x73, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x42, 0x0a, 0x1e, 0x64, 0x65, 0x66, 0x61, 0x75, + 0x6c, 0x74, 0x5f, 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x5f, 0x6e, 0x65, 0x77, 0x5f, 0x6c, 0x69, + 0x6e, 0x65, 0x73, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x04, 0x52, + 0x1a, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x42, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x4e, 0x65, + 0x77, 0x4c, 0x69, 0x6e, 0x65, 0x73, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x42, 0x0a, 0x1e, 0x6f, + 0x74, 0x68, 0x65, 0x72, 0x5f, 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x65, 0x73, 0x5f, 0x6e, 0x65, + 0x77, 0x5f, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x08, 0x20, + 0x01, 0x28, 0x04, 0x52, 0x1a, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x42, 0x72, 0x61, 0x6e, 0x63, 0x68, + 0x65, 0x73, 0x4e, 0x65, 0x77, 0x4c, 0x69, 0x6e, 0x65, 0x73, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x22, + 0xa9, 0x07, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x30, 0x0a, 0x14, 0x63, 0x6f, 0x6e, + 0x74, 0x65, 0x6e, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5f, 0x6c, 0x6f, 0x61, 0x64, 0x65, + 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x12, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, + 0x42, 0x79, 0x74, 0x65, 0x73, 0x4c, 0x6f, 0x61, 0x64, 0x65, 0x64, 0x12, 0x2c, 0x0a, 0x12, 0x69, + 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5f, 0x6c, 0x6f, 0x61, 0x64, 0x65, + 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x10, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x42, 0x79, + 0x74, 0x65, 0x73, 0x4c, 0x6f, 0x61, 0x64, 0x65, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x63, 0x72, 0x61, + 0x73, 0x68, 0x65, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x63, 0x72, 0x61, 0x73, + 0x68, 0x65, 0x73, 0x12, 0x35, 0x0a, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, + 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x52, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1d, 0x0a, 0x0a, 0x66, 0x69, + 0x6c, 0x65, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, + 0x66, 0x69, 0x6c, 0x65, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x34, 0x0a, 0x16, 0x73, 0x68, 0x61, + 0x72, 0x64, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x5f, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x64, 0x65, + 0x72, 0x65, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x03, 0x52, 0x14, 0x73, 0x68, 0x61, 0x72, 0x64, + 0x46, 0x69, 0x6c, 0x65, 0x73, 0x43, 0x6f, 0x6e, 0x73, 0x69, 0x64, 0x65, 0x72, 0x65, 0x64, 0x12, + 0x29, 0x0a, 0x10, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x5f, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x64, 0x65, + 0x72, 0x65, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0f, 0x66, 0x69, 0x6c, 0x65, 0x73, + 0x43, 0x6f, 0x6e, 0x73, 0x69, 0x64, 0x65, 0x72, 0x65, 0x64, 0x12, 0x21, 0x0a, 0x0c, 0x66, 0x69, + 0x6c, 0x65, 0x73, 0x5f, 0x6c, 0x6f, 0x61, 0x64, 0x65, 0x64, 0x18, 0x08, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x0b, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x4c, 0x6f, 0x61, 0x64, 0x65, 0x64, 0x12, 0x23, 0x0a, + 0x0d, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x5f, 0x73, 0x6b, 0x69, 0x70, 0x70, 0x65, 0x64, 0x18, 0x09, + 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x53, 0x6b, 0x69, 0x70, 0x70, + 0x65, 0x64, 0x12, 0x25, 0x0a, 0x0e, 0x73, 0x68, 0x61, 0x72, 0x64, 0x73, 0x5f, 0x73, 0x63, 0x61, + 0x6e, 0x6e, 0x65, 0x64, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0d, 0x73, 0x68, 0x61, 0x72, + 0x64, 0x73, 0x53, 0x63, 0x61, 0x6e, 0x6e, 0x65, 0x64, 0x12, 0x25, 0x0a, 0x0e, 0x73, 0x68, 0x61, + 0x72, 0x64, 0x73, 0x5f, 0x73, 0x6b, 0x69, 0x70, 0x70, 0x65, 0x64, 0x18, 0x0b, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x0d, 0x73, 0x68, 0x61, 0x72, 0x64, 0x73, 0x53, 0x6b, 0x69, 0x70, 0x70, 0x65, 0x64, + 0x12, 0x32, 0x0a, 0x15, 0x73, 0x68, 0x61, 0x72, 0x64, 0x73, 0x5f, 0x73, 0x6b, 0x69, 0x70, 0x70, + 0x65, 0x64, 0x5f, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x03, 0x52, + 0x13, 0x73, 0x68, 0x61, 0x72, 0x64, 0x73, 0x53, 0x6b, 0x69, 0x70, 0x70, 0x65, 0x64, 0x46, 0x69, + 0x6c, 0x74, 0x65, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5f, 0x63, 0x6f, + 0x75, 0x6e, 0x74, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x6d, 0x61, 0x74, 0x63, 0x68, + 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x6e, 0x67, 0x72, 0x61, 0x6d, 0x5f, 0x6d, + 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, 0x6e, 0x67, + 0x72, 0x61, 0x6d, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x12, 0x2d, 0x0a, 0x04, 0x77, 0x61, + 0x69, 0x74, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, + 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x52, 0x04, 0x77, 0x61, 0x69, 0x74, 0x12, 0x51, 0x0a, 0x17, 0x6d, 0x61, 0x74, + 0x63, 0x68, 0x5f, 0x74, 0x72, 0x65, 0x65, 0x5f, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x72, 0x75, 0x63, + 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x13, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x15, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x54, 0x72, 0x65, 0x65, + 0x43, 0x6f, 0x6e, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x45, 0x0a, 0x11, + 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5f, 0x74, 0x72, 0x65, 0x65, 0x5f, 0x73, 0x65, 0x61, 0x72, 0x63, + 0x68, 0x18, 0x14, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x52, 0x0f, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x54, 0x72, 0x65, 0x65, 0x53, 0x65, 0x61, + 0x72, 0x63, 0x68, 0x12, 0x2d, 0x0a, 0x12, 0x72, 0x65, 0x67, 0x65, 0x78, 0x70, 0x73, 0x5f, 0x63, + 0x6f, 0x6e, 0x73, 0x69, 0x64, 0x65, 0x72, 0x65, 0x64, 0x18, 0x10, 0x20, 0x01, 0x28, 0x03, 0x52, + 0x11, 0x72, 0x65, 0x67, 0x65, 0x78, 0x70, 0x73, 0x43, 0x6f, 0x6e, 0x73, 0x69, 0x64, 0x65, 0x72, + 0x65, 0x64, 0x12, 0x42, 0x0a, 0x0c, 0x66, 0x6c, 0x75, 0x73, 0x68, 0x5f, 0x72, 0x65, 0x61, 0x73, + 0x6f, 0x6e, 0x18, 0x11, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1f, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, + 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x46, 0x6c, + 0x75, 0x73, 0x68, 0x52, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x52, 0x0b, 0x66, 0x6c, 0x75, 0x73, 0x68, + 0x52, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x12, 0x23, 0x0a, 0x0d, 0x6e, 0x67, 0x72, 0x61, 0x6d, 0x5f, + 0x6c, 0x6f, 0x6f, 0x6b, 0x75, 0x70, 0x73, 0x18, 0x12, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, 0x6e, + 0x67, 0x72, 0x61, 0x6d, 0x4c, 0x6f, 0x6f, 0x6b, 0x75, 0x70, 0x73, 0x22, 0x58, 0x0a, 0x08, 0x50, + 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x69, 0x6f, 0x72, + 0x69, 0x74, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x01, 0x52, 0x08, 0x70, 0x72, 0x69, 0x6f, 0x72, + 0x69, 0x74, 0x79, 0x12, 0x30, 0x0a, 0x14, 0x6d, 0x61, 0x78, 0x5f, 0x70, 0x65, 0x6e, 0x64, 0x69, + 0x6e, 0x67, 0x5f, 0x70, 0x72, 0x69, 0x6f, 0x72, 0x69, 0x74, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x01, 0x52, 0x12, 0x6d, 0x61, 0x78, 0x50, 0x65, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x50, 0x72, 0x69, + 0x6f, 0x72, 0x69, 0x74, 0x79, 0x22, 0xb9, 0x04, 0x0a, 0x09, 0x46, 0x69, 0x6c, 0x65, 0x4d, 0x61, + 0x74, 0x63, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x01, 0x52, 0x05, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x64, 0x65, 0x62, + 0x75, 0x67, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x64, 0x65, 0x62, 0x75, 0x67, 0x12, + 0x1b, 0x0a, 0x09, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x0c, 0x52, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x1e, 0x0a, 0x0a, + 0x72, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x0a, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x12, 0x1a, 0x0a, 0x08, + 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, + 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x65, 0x73, 0x12, 0x40, 0x0a, 0x0c, 0x6c, 0x69, 0x6e, 0x65, + 0x5f, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, - 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x48, 0x00, - 0x52, 0x0a, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x88, 0x01, 0x01, 0x42, - 0x0e, 0x0a, 0x0c, 0x5f, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x22, - 0x6b, 0x0a, 0x0a, 0x53, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x10, 0x0a, - 0x03, 0x73, 0x79, 0x6d, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x6d, 0x12, - 0x12, 0x0a, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6b, - 0x69, 0x6e, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x06, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x70, - 0x61, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x6b, 0x69, 0x6e, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0a, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x4b, 0x69, 0x6e, 0x64, 0x22, 0xd9, 0x02, 0x0a, - 0x0a, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x12, 0x18, 0x0a, 0x07, 0x63, - 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x63, 0x6f, - 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x12, 0x41, 0x0a, 0x0d, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, - 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x7a, - 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, - 0x31, 0x2e, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0c, 0x63, 0x6f, 0x6e, 0x74, - 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x72, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x66, 0x69, 0x6c, 0x65, - 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x66, 0x69, 0x6c, - 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x31, 0x0a, 0x06, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x73, 0x18, - 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, - 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x52, 0x61, 0x6e, 0x67, 0x65, - 0x52, 0x06, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x73, 0x12, 0x3f, 0x0a, 0x0b, 0x73, 0x79, 0x6d, 0x62, - 0x6f, 0x6c, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1e, 0x2e, - 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, - 0x76, 0x31, 0x2e, 0x53, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x0a, 0x73, - 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x63, 0x6f, - 0x72, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x01, 0x52, 0x05, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x12, - 0x1f, 0x0a, 0x0b, 0x64, 0x65, 0x62, 0x75, 0x67, 0x5f, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x18, 0x07, + 0x2e, 0x76, 0x31, 0x2e, 0x4c, 0x69, 0x6e, 0x65, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x0b, 0x6c, + 0x69, 0x6e, 0x65, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x12, 0x43, 0x0a, 0x0d, 0x63, 0x68, + 0x75, 0x6e, 0x6b, 0x5f, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x1e, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, + 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x4d, 0x61, 0x74, 0x63, + 0x68, 0x52, 0x0c, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x12, + 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x5f, 0x69, 0x64, + 0x18, 0x08, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, + 0x72, 0x79, 0x49, 0x64, 0x12, 0x2f, 0x0a, 0x13, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, + 0x72, 0x79, 0x5f, 0x70, 0x72, 0x69, 0x6f, 0x72, 0x69, 0x74, 0x79, 0x18, 0x09, 0x20, 0x01, 0x28, + 0x01, 0x52, 0x12, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x50, 0x72, 0x69, + 0x6f, 0x72, 0x69, 0x74, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, + 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x12, + 0x1a, 0x0a, 0x08, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x73, 0x75, 0x6d, 0x18, 0x0b, 0x20, 0x01, 0x28, + 0x0c, 0x52, 0x08, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x73, 0x75, 0x6d, 0x12, 0x1a, 0x0a, 0x08, 0x6c, + 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, + 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x12, 0x2e, 0x0a, 0x13, 0x73, 0x75, 0x62, 0x5f, 0x72, + 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x0d, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x73, 0x75, 0x62, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, + 0x6f, 0x72, 0x79, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2e, 0x0a, 0x13, 0x73, 0x75, 0x62, 0x5f, 0x72, + 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x6f, 0x72, 0x79, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x18, 0x0e, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x73, 0x75, 0x62, 0x52, 0x65, 0x70, 0x6f, 0x73, 0x69, 0x74, + 0x6f, 0x72, 0x79, 0x50, 0x61, 0x74, 0x68, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, + 0x6e, 0x22, 0xca, 0x02, 0x0a, 0x09, 0x4c, 0x69, 0x6e, 0x65, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x12, + 0x12, 0x0a, 0x04, 0x6c, 0x69, 0x6e, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x6c, + 0x69, 0x6e, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x73, 0x74, 0x61, 0x72, + 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x6c, 0x69, 0x6e, 0x65, 0x53, 0x74, 0x61, + 0x72, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x65, 0x6e, 0x64, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x6c, 0x69, 0x6e, 0x65, 0x45, 0x6e, 0x64, 0x12, 0x1f, 0x0a, + 0x0b, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x04, 0x20, 0x01, + 0x28, 0x03, 0x52, 0x0a, 0x6c, 0x69, 0x6e, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x16, + 0x0a, 0x06, 0x62, 0x65, 0x66, 0x6f, 0x72, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, + 0x62, 0x65, 0x66, 0x6f, 0x72, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x61, 0x66, 0x74, 0x65, 0x72, 0x18, + 0x06, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, 0x61, 0x66, 0x74, 0x65, 0x72, 0x12, 0x1b, 0x0a, 0x09, + 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x08, 0x66, 0x69, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x63, 0x6f, + 0x72, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, 0x01, 0x52, 0x05, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x12, + 0x1f, 0x0a, 0x0b, 0x64, 0x65, 0x62, 0x75, 0x67, 0x5f, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x64, 0x65, 0x62, 0x75, 0x67, 0x53, 0x63, 0x6f, 0x72, 0x65, - 0x12, 0x26, 0x0a, 0x0f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x6d, 0x61, - 0x74, 0x63, 0x68, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, 0x62, 0x65, 0x73, 0x74, 0x4c, - 0x69, 0x6e, 0x65, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x22, 0x6b, 0x0a, 0x05, 0x52, 0x61, 0x6e, 0x67, - 0x65, 0x12, 0x32, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, - 0x32, 0x1c, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, - 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x05, - 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x2e, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, - 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x52, 0x03, 0x65, 0x6e, 0x64, 0x22, 0x64, 0x0a, 0x08, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, - 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x62, 0x79, 0x74, 0x65, 0x4f, 0x66, 0x66, 0x73, - 0x65, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x6e, 0x75, 0x6d, 0x62, 0x65, - 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x6c, 0x69, 0x6e, 0x65, 0x4e, 0x75, 0x6d, - 0x62, 0x65, 0x72, 0x12, 0x16, 0x0a, 0x06, 0x63, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x06, 0x63, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x2a, 0x8c, 0x01, 0x0a, 0x0b, - 0x46, 0x6c, 0x75, 0x73, 0x68, 0x52, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x20, 0x46, - 0x4c, 0x55, 0x53, 0x48, 0x5f, 0x52, 0x45, 0x41, 0x53, 0x4f, 0x4e, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, - 0x4f, 0x57, 0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, - 0x00, 0x12, 0x1e, 0x0a, 0x1a, 0x46, 0x4c, 0x55, 0x53, 0x48, 0x5f, 0x52, 0x45, 0x41, 0x53, 0x4f, - 0x4e, 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x52, 0x5f, 0x45, 0x58, 0x50, 0x49, 0x52, 0x45, 0x44, 0x10, - 0x01, 0x12, 0x1c, 0x0a, 0x18, 0x46, 0x4c, 0x55, 0x53, 0x48, 0x5f, 0x52, 0x45, 0x41, 0x53, 0x4f, - 0x4e, 0x5f, 0x46, 0x49, 0x4e, 0x41, 0x4c, 0x5f, 0x46, 0x4c, 0x55, 0x53, 0x48, 0x10, 0x02, 0x12, - 0x19, 0x0a, 0x15, 0x46, 0x4c, 0x55, 0x53, 0x48, 0x5f, 0x52, 0x45, 0x41, 0x53, 0x4f, 0x4e, 0x5f, - 0x4d, 0x41, 0x58, 0x5f, 0x53, 0x49, 0x5a, 0x45, 0x10, 0x03, 0x32, 0x99, 0x02, 0x0a, 0x10, 0x57, - 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, - 0x51, 0x0a, 0x06, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x12, 0x21, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, - 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x53, - 0x65, 0x61, 0x72, 0x63, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x22, 0x2e, 0x7a, + 0x12, 0x4c, 0x0a, 0x0e, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x66, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, + 0x74, 0x73, 0x18, 0x0a, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, + 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4c, 0x69, + 0x6e, 0x65, 0x46, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, + 0x0d, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x22, 0xc5, + 0x01, 0x0a, 0x11, 0x4c, 0x69, 0x6e, 0x65, 0x46, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x4d, + 0x61, 0x74, 0x63, 0x68, 0x12, 0x1f, 0x0a, 0x0b, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x6f, 0x66, 0x66, + 0x73, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x6c, 0x69, 0x6e, 0x65, 0x4f, + 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x21, 0x0a, + 0x0c, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5f, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x03, 0x52, 0x0b, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x4c, 0x65, 0x6e, 0x67, 0x74, 0x68, + 0x12, 0x44, 0x0a, 0x0b, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x18, + 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, + 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x79, 0x6d, 0x62, 0x6f, + 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x48, 0x00, 0x52, 0x0a, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x49, + 0x6e, 0x66, 0x6f, 0x88, 0x01, 0x01, 0x42, 0x0e, 0x0a, 0x0c, 0x5f, 0x73, 0x79, 0x6d, 0x62, 0x6f, + 0x6c, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x22, 0x6b, 0x0a, 0x0a, 0x53, 0x79, 0x6d, 0x62, 0x6f, 0x6c, + 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x6d, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x73, 0x79, 0x6d, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x70, 0x61, + 0x72, 0x65, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x70, 0x61, 0x72, 0x65, + 0x6e, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x6b, 0x69, 0x6e, + 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x4b, + 0x69, 0x6e, 0x64, 0x22, 0xd9, 0x02, 0x0a, 0x0a, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x4d, 0x61, 0x74, + 0x63, 0x68, 0x12, 0x18, 0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0c, 0x52, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x12, 0x41, 0x0a, 0x0d, + 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, + 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x52, 0x0c, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x72, 0x74, 0x12, + 0x1b, 0x0a, 0x09, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x08, 0x52, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x31, 0x0a, 0x06, + 0x72, 0x61, 0x6e, 0x67, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, - 0x31, 0x2e, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x22, 0x00, 0x12, 0x65, 0x0a, 0x0c, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x53, 0x65, 0x61, 0x72, - 0x63, 0x68, 0x12, 0x27, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, + 0x31, 0x2e, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x52, 0x06, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x73, 0x12, + 0x3f, 0x0a, 0x0b, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x05, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, + 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x79, 0x6d, 0x62, 0x6f, 0x6c, + 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x0a, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x49, 0x6e, 0x66, 0x6f, + 0x12, 0x14, 0x0a, 0x05, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x01, 0x52, + 0x05, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x64, 0x65, 0x62, 0x75, 0x67, 0x5f, + 0x73, 0x63, 0x6f, 0x72, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x64, 0x65, 0x62, + 0x75, 0x67, 0x53, 0x63, 0x6f, 0x72, 0x65, 0x12, 0x26, 0x0a, 0x0f, 0x62, 0x65, 0x73, 0x74, 0x5f, + 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0d, + 0x52, 0x0d, 0x62, 0x65, 0x73, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x22, + 0x6b, 0x0a, 0x05, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x12, 0x32, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, + 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, + 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4c, 0x6f, 0x63, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x2e, 0x0a, 0x03, + 0x65, 0x6e, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, + 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4c, + 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x22, 0x64, 0x0a, 0x08, + 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x62, 0x79, 0x74, 0x65, + 0x5f, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x62, + 0x79, 0x74, 0x65, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x6c, 0x69, 0x6e, + 0x65, 0x5f, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, + 0x6c, 0x69, 0x6e, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x16, 0x0a, 0x06, 0x63, 0x6f, + 0x6c, 0x75, 0x6d, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x63, 0x6f, 0x6c, 0x75, + 0x6d, 0x6e, 0x2a, 0x8c, 0x01, 0x0a, 0x0b, 0x46, 0x6c, 0x75, 0x73, 0x68, 0x52, 0x65, 0x61, 0x73, + 0x6f, 0x6e, 0x12, 0x24, 0x0a, 0x20, 0x46, 0x4c, 0x55, 0x53, 0x48, 0x5f, 0x52, 0x45, 0x41, 0x53, + 0x4f, 0x4e, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, + 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x1e, 0x0a, 0x1a, 0x46, 0x4c, 0x55, 0x53, + 0x48, 0x5f, 0x52, 0x45, 0x41, 0x53, 0x4f, 0x4e, 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x52, 0x5f, 0x45, + 0x58, 0x50, 0x49, 0x52, 0x45, 0x44, 0x10, 0x01, 0x12, 0x1c, 0x0a, 0x18, 0x46, 0x4c, 0x55, 0x53, + 0x48, 0x5f, 0x52, 0x45, 0x41, 0x53, 0x4f, 0x4e, 0x5f, 0x46, 0x49, 0x4e, 0x41, 0x4c, 0x5f, 0x46, + 0x4c, 0x55, 0x53, 0x48, 0x10, 0x02, 0x12, 0x19, 0x0a, 0x15, 0x46, 0x4c, 0x55, 0x53, 0x48, 0x5f, + 0x52, 0x45, 0x41, 0x53, 0x4f, 0x4e, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x53, 0x49, 0x5a, 0x45, 0x10, + 0x03, 0x32, 0x99, 0x02, 0x0a, 0x10, 0x57, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x53, + 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x51, 0x0a, 0x06, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, + 0x12, 0x21, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, + 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x1a, 0x22, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, + 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x52, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x65, 0x0a, 0x0c, 0x53, 0x74, 0x72, + 0x65, 0x61, 0x6d, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x12, 0x27, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, + 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x53, + 0x74, 0x72, 0x65, 0x61, 0x6d, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x1a, 0x28, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x53, 0x65, - 0x61, 0x72, 0x63, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x28, 0x2e, 0x7a, 0x6f, - 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, - 0x2e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x30, 0x01, 0x12, 0x4b, 0x0a, 0x04, 0x4c, 0x69, 0x73, - 0x74, 0x12, 0x1f, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, - 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x1a, 0x20, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, - 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x52, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x3d, 0x5a, 0x3b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, - 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, - 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x73, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, - 0x65, 0x72, 0x2f, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x61, 0x72, 0x63, 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x30, 0x01, + 0x12, 0x4b, 0x0a, 0x04, 0x4c, 0x69, 0x73, 0x74, 0x12, 0x1f, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, 0x74, + 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4c, 0x69, + 0x73, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x20, 0x2e, 0x7a, 0x6f, 0x65, 0x6b, + 0x74, 0x2e, 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4c, + 0x69, 0x73, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x3d, 0x5a, + 0x3b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, 0x6f, 0x75, 0x72, + 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, 0x67, 0x72, + 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x7a, 0x6f, 0x65, 0x6b, 0x74, 0x2f, + 0x77, 0x65, 0x62, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2f, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -2774,7 +2792,7 @@ func file_zoekt_webserver_v1_webserver_proto_rawDescGZIP() []byte { } var file_zoekt_webserver_v1_webserver_proto_enumTypes = make([]protoimpl.EnumInfo, 2) -var file_zoekt_webserver_v1_webserver_proto_msgTypes = make([]protoimpl.MessageInfo, 27) +var file_zoekt_webserver_v1_webserver_proto_msgTypes = make([]protoimpl.MessageInfo, 28) var file_zoekt_webserver_v1_webserver_proto_goTypes = []interface{}{ (FlushReason)(0), // 0: zoekt.webserver.v1.FlushReason (ListOptions_RepoListField)(0), // 1: zoekt.webserver.v1.ListOptions.RepoListField @@ -2804,22 +2822,23 @@ var file_zoekt_webserver_v1_webserver_proto_goTypes = []interface{}{ nil, // 25: zoekt.webserver.v1.ListResponse.ReposMapEntry nil, // 26: zoekt.webserver.v1.Repository.SubRepoMapEntry nil, // 27: zoekt.webserver.v1.Repository.RawConfigEntry - nil, // 28: zoekt.webserver.v1.IndexMetadata.LanguageMapEntry - (*Q)(nil), // 29: zoekt.webserver.v1.Q - (*durationpb.Duration)(nil), // 30: google.protobuf.Duration - (*timestamppb.Timestamp)(nil), // 31: google.protobuf.Timestamp + nil, // 28: zoekt.webserver.v1.Repository.MetadataEntry + nil, // 29: zoekt.webserver.v1.IndexMetadata.LanguageMapEntry + (*Q)(nil), // 30: zoekt.webserver.v1.Q + (*durationpb.Duration)(nil), // 31: google.protobuf.Duration + (*timestamppb.Timestamp)(nil), // 32: google.protobuf.Timestamp } var file_zoekt_webserver_v1_webserver_proto_depIdxs = []int32{ - 29, // 0: zoekt.webserver.v1.SearchRequest.query:type_name -> zoekt.webserver.v1.Q + 30, // 0: zoekt.webserver.v1.SearchRequest.query:type_name -> zoekt.webserver.v1.Q 6, // 1: zoekt.webserver.v1.SearchRequest.opts:type_name -> zoekt.webserver.v1.SearchOptions 16, // 2: zoekt.webserver.v1.SearchResponse.stats:type_name -> zoekt.webserver.v1.Stats 17, // 3: zoekt.webserver.v1.SearchResponse.progress:type_name -> zoekt.webserver.v1.Progress 18, // 4: zoekt.webserver.v1.SearchResponse.files:type_name -> zoekt.webserver.v1.FileMatch 2, // 5: zoekt.webserver.v1.StreamSearchRequest.request:type_name -> zoekt.webserver.v1.SearchRequest 3, // 6: zoekt.webserver.v1.StreamSearchResponse.response_chunk:type_name -> zoekt.webserver.v1.SearchResponse - 30, // 7: zoekt.webserver.v1.SearchOptions.max_wall_time:type_name -> google.protobuf.Duration - 30, // 8: zoekt.webserver.v1.SearchOptions.flush_wall_time:type_name -> google.protobuf.Duration - 29, // 9: zoekt.webserver.v1.ListRequest.query:type_name -> zoekt.webserver.v1.Q + 31, // 7: zoekt.webserver.v1.SearchOptions.max_wall_time:type_name -> google.protobuf.Duration + 31, // 8: zoekt.webserver.v1.SearchOptions.flush_wall_time:type_name -> google.protobuf.Duration + 30, // 9: zoekt.webserver.v1.ListRequest.query:type_name -> zoekt.webserver.v1.Q 8, // 10: zoekt.webserver.v1.ListRequest.opts:type_name -> zoekt.webserver.v1.ListOptions 1, // 11: zoekt.webserver.v1.ListOptions.field:type_name -> zoekt.webserver.v1.ListOptions.RepoListField 10, // 12: zoekt.webserver.v1.ListResponse.repos:type_name -> zoekt.webserver.v1.RepoListEntry @@ -2831,37 +2850,38 @@ var file_zoekt_webserver_v1_webserver_proto_depIdxs = []int32{ 14, // 18: zoekt.webserver.v1.Repository.branches:type_name -> zoekt.webserver.v1.RepositoryBranch 26, // 19: zoekt.webserver.v1.Repository.sub_repo_map:type_name -> zoekt.webserver.v1.Repository.SubRepoMapEntry 27, // 20: zoekt.webserver.v1.Repository.raw_config:type_name -> zoekt.webserver.v1.Repository.RawConfigEntry - 31, // 21: zoekt.webserver.v1.Repository.latest_commit_date:type_name -> google.protobuf.Timestamp - 31, // 22: zoekt.webserver.v1.IndexMetadata.index_time:type_name -> google.protobuf.Timestamp - 28, // 23: zoekt.webserver.v1.IndexMetadata.language_map:type_name -> zoekt.webserver.v1.IndexMetadata.LanguageMapEntry - 14, // 24: zoekt.webserver.v1.MinimalRepoListEntry.branches:type_name -> zoekt.webserver.v1.RepositoryBranch - 30, // 25: zoekt.webserver.v1.Stats.duration:type_name -> google.protobuf.Duration - 30, // 26: zoekt.webserver.v1.Stats.wait:type_name -> google.protobuf.Duration - 30, // 27: zoekt.webserver.v1.Stats.match_tree_construction:type_name -> google.protobuf.Duration - 30, // 28: zoekt.webserver.v1.Stats.match_tree_search:type_name -> google.protobuf.Duration - 0, // 29: zoekt.webserver.v1.Stats.flush_reason:type_name -> zoekt.webserver.v1.FlushReason - 19, // 30: zoekt.webserver.v1.FileMatch.line_matches:type_name -> zoekt.webserver.v1.LineMatch - 22, // 31: zoekt.webserver.v1.FileMatch.chunk_matches:type_name -> zoekt.webserver.v1.ChunkMatch - 20, // 32: zoekt.webserver.v1.LineMatch.line_fragments:type_name -> zoekt.webserver.v1.LineFragmentMatch - 21, // 33: zoekt.webserver.v1.LineFragmentMatch.symbol_info:type_name -> zoekt.webserver.v1.SymbolInfo - 24, // 34: zoekt.webserver.v1.ChunkMatch.content_start:type_name -> zoekt.webserver.v1.Location - 23, // 35: zoekt.webserver.v1.ChunkMatch.ranges:type_name -> zoekt.webserver.v1.Range - 21, // 36: zoekt.webserver.v1.ChunkMatch.symbol_info:type_name -> zoekt.webserver.v1.SymbolInfo - 24, // 37: zoekt.webserver.v1.Range.start:type_name -> zoekt.webserver.v1.Location - 24, // 38: zoekt.webserver.v1.Range.end:type_name -> zoekt.webserver.v1.Location - 13, // 39: zoekt.webserver.v1.ListResponse.ReposMapEntry.value:type_name -> zoekt.webserver.v1.MinimalRepoListEntry - 11, // 40: zoekt.webserver.v1.Repository.SubRepoMapEntry.value:type_name -> zoekt.webserver.v1.Repository - 2, // 41: zoekt.webserver.v1.WebserverService.Search:input_type -> zoekt.webserver.v1.SearchRequest - 4, // 42: zoekt.webserver.v1.WebserverService.StreamSearch:input_type -> zoekt.webserver.v1.StreamSearchRequest - 7, // 43: zoekt.webserver.v1.WebserverService.List:input_type -> zoekt.webserver.v1.ListRequest - 3, // 44: zoekt.webserver.v1.WebserverService.Search:output_type -> zoekt.webserver.v1.SearchResponse - 5, // 45: zoekt.webserver.v1.WebserverService.StreamSearch:output_type -> zoekt.webserver.v1.StreamSearchResponse - 9, // 46: zoekt.webserver.v1.WebserverService.List:output_type -> zoekt.webserver.v1.ListResponse - 44, // [44:47] is the sub-list for method output_type - 41, // [41:44] is the sub-list for method input_type - 41, // [41:41] is the sub-list for extension type_name - 41, // [41:41] is the sub-list for extension extendee - 0, // [0:41] is the sub-list for field type_name + 32, // 21: zoekt.webserver.v1.Repository.latest_commit_date:type_name -> google.protobuf.Timestamp + 28, // 22: zoekt.webserver.v1.Repository.metadata:type_name -> zoekt.webserver.v1.Repository.MetadataEntry + 32, // 23: zoekt.webserver.v1.IndexMetadata.index_time:type_name -> google.protobuf.Timestamp + 29, // 24: zoekt.webserver.v1.IndexMetadata.language_map:type_name -> zoekt.webserver.v1.IndexMetadata.LanguageMapEntry + 14, // 25: zoekt.webserver.v1.MinimalRepoListEntry.branches:type_name -> zoekt.webserver.v1.RepositoryBranch + 31, // 26: zoekt.webserver.v1.Stats.duration:type_name -> google.protobuf.Duration + 31, // 27: zoekt.webserver.v1.Stats.wait:type_name -> google.protobuf.Duration + 31, // 28: zoekt.webserver.v1.Stats.match_tree_construction:type_name -> google.protobuf.Duration + 31, // 29: zoekt.webserver.v1.Stats.match_tree_search:type_name -> google.protobuf.Duration + 0, // 30: zoekt.webserver.v1.Stats.flush_reason:type_name -> zoekt.webserver.v1.FlushReason + 19, // 31: zoekt.webserver.v1.FileMatch.line_matches:type_name -> zoekt.webserver.v1.LineMatch + 22, // 32: zoekt.webserver.v1.FileMatch.chunk_matches:type_name -> zoekt.webserver.v1.ChunkMatch + 20, // 33: zoekt.webserver.v1.LineMatch.line_fragments:type_name -> zoekt.webserver.v1.LineFragmentMatch + 21, // 34: zoekt.webserver.v1.LineFragmentMatch.symbol_info:type_name -> zoekt.webserver.v1.SymbolInfo + 24, // 35: zoekt.webserver.v1.ChunkMatch.content_start:type_name -> zoekt.webserver.v1.Location + 23, // 36: zoekt.webserver.v1.ChunkMatch.ranges:type_name -> zoekt.webserver.v1.Range + 21, // 37: zoekt.webserver.v1.ChunkMatch.symbol_info:type_name -> zoekt.webserver.v1.SymbolInfo + 24, // 38: zoekt.webserver.v1.Range.start:type_name -> zoekt.webserver.v1.Location + 24, // 39: zoekt.webserver.v1.Range.end:type_name -> zoekt.webserver.v1.Location + 13, // 40: zoekt.webserver.v1.ListResponse.ReposMapEntry.value:type_name -> zoekt.webserver.v1.MinimalRepoListEntry + 11, // 41: zoekt.webserver.v1.Repository.SubRepoMapEntry.value:type_name -> zoekt.webserver.v1.Repository + 2, // 42: zoekt.webserver.v1.WebserverService.Search:input_type -> zoekt.webserver.v1.SearchRequest + 4, // 43: zoekt.webserver.v1.WebserverService.StreamSearch:input_type -> zoekt.webserver.v1.StreamSearchRequest + 7, // 44: zoekt.webserver.v1.WebserverService.List:input_type -> zoekt.webserver.v1.ListRequest + 3, // 45: zoekt.webserver.v1.WebserverService.Search:output_type -> zoekt.webserver.v1.SearchResponse + 5, // 46: zoekt.webserver.v1.WebserverService.StreamSearch:output_type -> zoekt.webserver.v1.StreamSearchResponse + 9, // 47: zoekt.webserver.v1.WebserverService.List:output_type -> zoekt.webserver.v1.ListResponse + 45, // [45:48] is the sub-list for method output_type + 42, // [42:45] is the sub-list for method input_type + 42, // [42:42] is the sub-list for extension type_name + 42, // [42:42] is the sub-list for extension extendee + 0, // [0:42] is the sub-list for field type_name } func init() { file_zoekt_webserver_v1_webserver_proto_init() } @@ -3155,7 +3175,7 @@ func file_zoekt_webserver_v1_webserver_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_zoekt_webserver_v1_webserver_proto_rawDesc, NumEnums: 2, - NumMessages: 27, + NumMessages: 28, NumExtensions: 0, NumServices: 1, }, diff --git a/grpc/protos/zoekt/webserver/v1/webserver.proto b/grpc/protos/zoekt/webserver/v1/webserver.proto index 4a1db249f..8fb4fca71 100644 --- a/grpc/protos/zoekt/webserver/v1/webserver.proto +++ b/grpc/protos/zoekt/webserver/v1/webserver.proto @@ -216,6 +216,9 @@ message Repository { // tenant_id is the tenant ID of the repository. int64 tenant_id = 18; + + // Additional metadata about the repository. + map metadata = 19; } message IndexMetadata { diff --git a/ignore/ignore.go b/ignore/ignore.go index bc0506a57..fdb141eb5 100644 --- a/ignore/ignore.go +++ b/ignore/ignore.go @@ -1,4 +1,16 @@ -// package ignore provides helpers to support ignore-files similar to .gitignore +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package ignore provides helpers to support ignore-files similar to .gitignore package ignore import ( diff --git a/bits.go b/index/bits.go similarity index 99% rename from bits.go rename to index/bits.go index d438cbf15..e9270c165 100644 --- a/bits.go +++ b/index/bits.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "cmp" @@ -29,7 +29,7 @@ func generateCaseNgrams(g ngram) []ngram { variants := make([]ngram, 0, 8) cur := asRunes for { - for i := 0; i < 3; i++ { + for i := range 3 { next := unicode.SimpleFold(cur[i]) cur[i] = next if next != asRunes[i] { diff --git a/bits_test.go b/index/bits_test.go similarity index 97% rename from bits_test.go rename to index/bits_test.go index 0dba68283..fc759dc4a 100644 --- a/bits_test.go +++ b/index/bits_test.go @@ -12,14 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "encoding/binary" "log" + "math" "math/rand" "reflect" - "sort" + "slices" "strconv" "testing" "testing/quick" @@ -98,7 +99,7 @@ func TestNextFileIndex(t *testing.T) { ends []uint32 want uint32 }{ - {maxUInt32, 0, []uint32{34}, 1}, + {math.MaxUint32, 0, []uint32{34}, 1}, {9, 0, []uint32{34}, 0}, {450, 0, []uint32{100, 200, 300, 400, 500, 600}, 4}, } { @@ -149,7 +150,7 @@ func TestCompressedPostingIterator(t *testing.T) { var nums []uint32 i := newCompressedPostingIterator(data, stringToNGram("abc")) - for i.first() != maxUInt32 { + for i.first() != math.MaxUint32 { nums = append(nums, i.first()) i.next(i.first()) } @@ -197,7 +198,7 @@ func sortedUnique(nums []uint32) []uint32 { if len(nums) == 0 { return nums } - sort.Slice(nums, func(i, j int) bool { return nums[i] < nums[j] }) + slices.Sort(nums) filtered := nums[:1] for _, n := range nums[1:] { if filtered[len(filtered)-1] != n { @@ -295,7 +296,7 @@ func BenchmarkUnmarshalDocSections(b *testing.B) { for size := 10; size <= 10000; size *= 10 { ds := make([]DocumentSection, 0, size) var last uint32 - for i := 0; i < size; i++ { + for range size { var d DocumentSection last += 1 + uint32(rng.Int31n(200)) d.Start = last diff --git a/btree.go b/index/btree.go similarity index 98% rename from btree.go rename to index/btree.go index f70d0a638..e2c98f592 100644 --- a/btree.go +++ b/index/btree.go @@ -28,7 +28,7 @@ // Corpora, Proceedings of the ACL-HLT 2011 System Demonstrations, pages // 103-108 -package zoekt +package index import ( "encoding/binary" @@ -46,6 +46,11 @@ import ( // On linux "getconf PAGESIZE" returns the number of bytes in a memory page. const btreeBucketSize = (4096 * 2) / ngramEncoding +const ( + interfaceBytes uint64 = 16 + pointerSize uint64 = 8 +) + type btree struct { root node opts btreeOpts @@ -422,7 +427,7 @@ func (b btreeIndex) DumpMap() map[ngram]simpleSection { bucket, _ := b.file.Read(off, sz) // decode all ngrams in the bucket and fill map - for i := 0; i < len(bucket)/ngramEncoding; i++ { + for i := range len(bucket) / ngramEncoding { gram := ngram(binary.BigEndian.Uint64(bucket[i*8:])) m[gram] = b.getPostingList(int(n.postingIndexOffset) + i) } diff --git a/btree_test.go b/index/btree_test.go similarity index 98% rename from btree_test.go rename to index/btree_test.go index 019708441..67e9c58e3 100644 --- a/btree_test.go +++ b/index/btree_test.go @@ -1,4 +1,4 @@ -package zoekt +package index import ( "fmt" @@ -136,7 +136,7 @@ func TestGetBucket(t *testing.T) { bucketSize: bucketSize, v: 2, }) - for i := 0; i < tt.nNgrams; i++ { + for i := range tt.nNgrams { bt.insert(ngram(i + 1)) } bt.freeze() diff --git a/build/builder.go b/index/builder.go similarity index 85% rename from build/builder.go rename to index/builder.go index 498318699..41e89682a 100644 --- a/build/builder.go +++ b/index/builder.go @@ -12,12 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -// package build implements a more convenient interface for building -// zoekt indices. -package build +// Package index contains logic for building Zoekt indexes. NOTE: this package is not considered +// part of the public API, and it is not recommended to rely on it in external code. +package index import ( - "cmp" "crypto/sha1" "flag" "fmt" @@ -38,11 +37,14 @@ import ( "github.com/bmatcuk/doublestar" "github.com/dustin/go-humanize" - "github.com/go-enry/go-enry/v2" "github.com/rs/xid" + "golang.org/x/sys/unix" + + "maps" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/ctags" + "github.com/sourcegraph/zoekt/internal/ctags" + "github.com/sourcegraph/zoekt/internal/tenant" ) var DefaultDir = filepath.Join(os.Getenv("HOME"), ".zoekt") @@ -58,6 +60,9 @@ type Options struct { // IndexDir is a directory that holds *.zoekt index files. IndexDir string + // ShardPrefixOverride sets the prefix for shards name + ShardPrefixOverride string + // SizeMax is the maximum file size SizeMax int @@ -117,9 +122,6 @@ type Options struct { // // Note: heap checking is "best effort", and it's possible for the process to OOM without triggering the heap profile. HeapProfileTriggerBytes uint64 - - // ShardPrefix is the prefix of the shard. It defaults to the repository name. - ShardPrefix string } // HashOptions contains only the options in Options that upon modification leads to IndexState of IndexStateMismatch during the next index building. @@ -146,10 +148,10 @@ func (o *Options) GetHash() string { hasher := sha1.New() hasher.Write([]byte(h.ctagsPath)) - hasher.Write([]byte(fmt.Sprintf("%t", h.cTagsMustSucceed))) - hasher.Write([]byte(fmt.Sprintf("%d", h.sizeMax))) - hasher.Write([]byte(fmt.Sprintf("%q", h.largeFiles))) - hasher.Write([]byte(fmt.Sprintf("%t", h.disableCTags))) + hasher.Write(fmt.Appendf(nil, "%t", h.cTagsMustSucceed)) + hasher.Write(fmt.Appendf(nil, "%d", h.sizeMax)) + hasher.Write(fmt.Appendf(nil, "%q", h.largeFiles)) + hasher.Write(fmt.Appendf(nil, "%t", h.disableCTags)) return fmt.Sprintf("%x", hasher.Sum(nil)) } @@ -182,9 +184,9 @@ func (o *Options) Flags(fs *flag.FlagSet) { fs.IntVar(&o.ShardMax, "shard_limit", x.ShardMax, "maximum corpus size for a shard") fs.IntVar(&o.Parallelism, "parallelism", x.Parallelism, "maximum number of parallel indexing processes.") fs.StringVar(&o.IndexDir, "index", x.IndexDir, "directory for search indices") + fs.StringVar(&o.ShardPrefixOverride, "shard_prefix_override", x.ShardPrefixOverride, "prefix for shard name") fs.BoolVar(&o.CTagsMustSucceed, "require_ctags", x.CTagsMustSucceed, "If set, ctags calls must succeed.") fs.Var(largeFilesFlag{o}, "large_file", "A glob pattern where matching files are to be index regardless of their size. You can add multiple patterns by setting this more than once.") - fs.StringVar(&o.ShardPrefix, "shard_prefix", x.ShardPrefix, "the prefix of the shard. Defaults to repository name") // Sourcegraph specific fs.BoolVar(&o.DisableCTags, "disable_ctags", x.DisableCTags, "If set, ctags will not be called.") @@ -215,6 +217,10 @@ func (o *Options) Args() []string { args = append(args, "-index", o.IndexDir) } + if o.ShardPrefixOverride != "" { + args = append(args, "-shard_prefix_override", o.ShardPrefixOverride) + } + if o.CTagsMustSucceed { args = append(args, "-require_ctags") } @@ -232,10 +238,6 @@ func (o *Options) Args() []string { args = append(args, "-shard_merging") } - if o.ShardPrefix != "" { - args = append(args, "-shard_prefix", o.ShardPrefix) - } - return args } @@ -247,8 +249,8 @@ type Builder struct { throttle chan int nextShardNum int - todo []*zoekt.Document - docChecker zoekt.DocChecker + todo []*Document + docChecker DocChecker size int parserBins ctags.ParserBinMap @@ -272,6 +274,11 @@ type Builder struct { id string finishCalled bool + + // postingsPool reuses postingsBuilder instances across shard builds, + // retaining their map and slice allocations to avoid repeated + // memclr/madvise overhead. + postingsPool sync.Pool } type finishedShard struct { @@ -335,11 +342,23 @@ func (o *Options) SetDefaults() { // ShardName returns the name the given index shard. func (o *Options) shardName(n int) string { - return o.shardNameVersion(zoekt.IndexFormatVersion, n) + return o.shardNameVersion(IndexFormatVersion, n) } func (o *Options) shardNameVersion(version, n int) string { - return zoekt.ShardName(o.IndexDir, cmp.Or(o.ShardPrefix, o.RepositoryDescription.Name), version, n) + prefix := o.ShardPrefixOverride // ShardPrefixOverride takes precedence to support custom shard naming strategies + + if prefix == "" { + // Sourcegraph specific: We use IDs in shard names on multi-tenant + // instances to prevent conflicts. + if tenant.UseIDBasedShardNames() { + prefix = fmt.Sprintf("%09d_%09d", o.RepositoryDescription.TenantID, o.RepositoryDescription.ID) + } else { + prefix = o.RepositoryDescription.Name + } + } + + return shardName(o.IndexDir, prefix, version, n) } type IndexState string @@ -358,11 +377,11 @@ var readVersions = []struct { IndexFormatVersion int FeatureVersion int }{{ - IndexFormatVersion: zoekt.IndexFormatVersion, - FeatureVersion: zoekt.FeatureVersion, + IndexFormatVersion: IndexFormatVersion, + FeatureVersion: FeatureVersion, }, { - IndexFormatVersion: zoekt.NextIndexFormatVersion, - FeatureVersion: zoekt.FeatureVersion, + IndexFormatVersion: NextIndexFormatVersion, + FeatureVersion: FeatureVersion, }} // IncrementalSkipIndexing returns true if the index present on disk matches @@ -381,7 +400,7 @@ func (o *Options) IndexState() (IndexState, string) { return IndexStateMissing, fn } - repos, index, err := zoekt.ReadMetadataPathAlive(fn) + repos, index, err := ReadMetadataPathAlive(fn) if os.IsNotExist(err) { return IndexStateMissing, fn } else if err != nil { @@ -436,7 +455,7 @@ func (o *Options) FindRepositoryMetadata() (repository *zoekt.Repository, metada return nil, nil, false, nil } - repositories, metadata, err := zoekt.ReadMetadataPathAlive(shard) + repositories, metadata, err := ReadMetadataPathAlive(shard) if err != nil { return nil, nil, false, fmt.Errorf("reading metadata for shard %q: %w", shard, err) } @@ -466,24 +485,18 @@ func (o *Options) findShard() string { } // Brute force finding the shard in compound shards. We should only hit this - // code path for repositories that are not already existing or are in - // compound shards. - // - // TODO add an oracle which can speed this up in the case of repositories - // already in compound shards. + // code path for repositories that don't exist yet or are in compound shards. + return o.findCompoundShard() +} + +func (o *Options) findCompoundShard() string { compoundShards, err := filepath.Glob(path.Join(o.IndexDir, "compound-*.zoekt")) if err != nil { return "" } for _, fn := range compoundShards { - repos, _, err := zoekt.ReadMetadataPathAlive(fn) - if err != nil { - continue - } - for _, repo := range repos { - if repo.ID == o.RepositoryDescription.ID { - return fn - } + if containsRepo(fn, o.RepositoryDescription.ID) { + return fn } } @@ -592,10 +605,10 @@ func NewBuilder(opts Options) (*Builder, error) { // AddFile is a convenience wrapper for the Add method func (b *Builder) AddFile(name string, content []byte) error { - return b.Add(zoekt.Document{Name: name, Content: content}) + return b.Add(Document{Name: name, Content: content}) } -func (b *Builder) Add(doc zoekt.Document) error { +func (b *Builder) Add(doc Document) error { if b.finishCalled { return nil } @@ -606,18 +619,22 @@ func (b *Builder) Add(doc zoekt.Document) error { // we pass through a part of the source tree with binary/large // files, the corresponding shard would be mostly empty, so // insert a reason here too. - doc.SkipReason = fmt.Sprintf("document size %d larger than limit %d", len(doc.Content), b.opts.SizeMax) - } else if err := b.docChecker.Check(doc.Content, b.opts.TrigramMax, allowLargeFile); err != nil { - doc.SkipReason = err.Error() - doc.Language = "binary" + doc.SkipReason = SkipReasonTooLarge + } else if skip := b.docChecker.Check(doc.Content, b.opts.TrigramMax, allowLargeFile); skip != SkipReasonNone { + doc.SkipReason = skip } + // Pre-compute file category and language while content is still + // available, before content is dropped for skipped documents. + DetermineFileCategory(&doc) + DetermineLanguageIfUnknown(&doc) + b.todo = append(b.todo, &doc) - if doc.SkipReason == "" { + if doc.SkipReason == SkipReasonNone { b.size += len(doc.Name) + len(doc.Content) } else { - b.size += len(doc.Name) + len(doc.SkipReason) + b.size += len(doc.Name) // Drop the content if we are skipping the document. Skipped content is not counted towards the // shard size limit, so otherwise we might buffer too much data in memory before flushing. doc.Content = nil @@ -664,9 +681,7 @@ func (b *Builder) Finish() error { // map of temporary -> final names for all updated shards + shard metadata files artifactPaths := make(map[string]string) - for tmp, final := range b.finishedShards { - artifactPaths[tmp] = final - } + maps.Copy(artifactPaths, b.finishedShards) oldShards := b.opts.FindAllShards() @@ -674,7 +689,7 @@ func (b *Builder) Finish() error { // Delta shard builds need to update FileTombstone and branch commit information for all // existing shards for _, shard := range oldShards { - repositories, _, err := zoekt.ReadMetadataPathAlive(shard) + repositories, _, err := ReadMetadataPathAlive(shard) if err != nil { return fmt.Errorf("reading metadata from shard %q: %w", shard, err) } @@ -719,7 +734,9 @@ func (b *Builder) Finish() error { repository.LatestCommitDate = b.opts.RepositoryDescription.LatestCommitDate - tempPath, finalPath, err := zoekt.JsonMarshalRepoMetaTemp(shard, repository) + repository.Metadata = b.opts.RepositoryDescription.Metadata + + tempPath, finalPath, err := JsonMarshalRepoMetaTemp(shard, repository) if err != nil { return fmt.Errorf("writing repository metadta for shard %q: %w", shard, err) } @@ -747,7 +764,7 @@ func (b *Builder) Finish() error { toDelete = make(map[string]struct{}) for _, name := range oldShards { - paths, err := zoekt.IndexFilePaths(name) + paths, err := IndexFilePaths(name) if err != nil { b.buildError = fmt.Errorf("failed to find old paths for %s: %w", name, err) } @@ -774,7 +791,7 @@ func (b *Builder) Finish() error { if !strings.HasSuffix(p, ".zoekt") { continue } - err := zoekt.SetTombstone(p, b.opts.RepositoryDescription.ID) + err := SetTombstone(p, b.opts.RepositoryDescription.ID) b.buildError = err continue } @@ -860,18 +877,8 @@ func squashRange(j int) float64 { return x / (1 + x) } -// IsLowPriority takes a file name and makes an educated guess about its priority -// in search results. A file is considered low priority if it looks like a test, -// vendored, or generated file. -// -// These 'priority' criteria affects how documents are ordered within a shard. It's -// also used to help guess a file's rank when we're missing ranking information. -func IsLowPriority(path string, content []byte) bool { - return enry.IsTest(path) || enry.IsVendor(path) || enry.IsGenerated(path, content) -} - type rankedDoc struct { - *zoekt.Document + *Document rank []float64 } @@ -879,24 +886,25 @@ type rankedDoc struct { // before writing them to disk. The order of documents in the shard is important // at query time, because earlier documents receive a boost at query time and // have a higher chance of being searched before limits kick in. -func rank(d *zoekt.Document, origIdx int) []float64 { +func rank(d *Document, origIdx int) []float64 { skipped := 0.0 - if d.SkipReason != "" { + if d.SkipReason != SkipReasonNone { skipped = 1.0 } + // Use pre-computed Category from DetermineFileCategory. generated := 0.0 - if enry.IsGenerated(d.Name, d.Content) { + if d.Category == FileCategoryGenerated { generated = 1.0 } vendor := 0.0 - if enry.IsVendor(d.Name) { + if d.Category == FileCategoryVendored { vendor = 1.0 } test := 0.0 - if enry.IsTest(d.Name) { + if d.Category == FileCategoryTest { test = 1.0 } @@ -931,7 +939,7 @@ func rank(d *zoekt.Document, origIdx int) []float64 { } } -func sortDocuments(todo []*zoekt.Document) { +func sortDocuments(todo []*Document) { rs := make([]rankedDoc, 0, len(todo)) for i, t := range todo { rd := rankedDoc{t, rank(t, i)} @@ -956,7 +964,7 @@ func sortDocuments(todo []*zoekt.Document) { } } -func (b *Builder) buildShard(todo []*zoekt.Document, nextShardNum int) (*finishedShard, error) { +func (b *Builder) buildShard(todo []*Document, nextShardNum int) (*finishedShard, error) { if !b.opts.DisableCTags && (b.opts.CTagsPath != "" || b.opts.ScipCTagsPath != "") { err := parseSymbols(todo, b.opts.LanguageMap, b.parserBins) if b.opts.CTagsMustSucceed && err != nil { @@ -986,7 +994,9 @@ func (b *Builder) buildShard(todo []*zoekt.Document, nextShardNum int) (*finishe } } - return b.writeShard(name, shardBuilder) + result, err := b.writeShard(name, shardBuilder) + b.returnPostingsBuilders(shardBuilder) + return result, err } // CheckMemoryUsage checks the memory usage of the process and writes a memory profile if the heap usage exceeds the @@ -1020,14 +1030,37 @@ func (b *Builder) CheckMemoryUsage() { } } -func (b *Builder) newShardBuilder() (*zoekt.IndexBuilder, error) { +func (b *Builder) getPostingsBuilder() *postingsBuilder { + if pb, ok := b.postingsPool.Get().(*postingsBuilder); ok { + pb.reset() + return pb + } + return newPostingsBuilder(b.opts.ShardMax) +} + +// returnPostingsBuilders returns both postings builders from sb to the +// pool and nils the fields so any subsequent misuse crashes obviously. +func (b *Builder) returnPostingsBuilders(sb *ShardBuilder) { + if sb.contentPostings != nil { + b.postingsPool.Put(sb.contentPostings) + sb.contentPostings = nil + } + if sb.namePostings != nil { + b.postingsPool.Put(sb.namePostings) + sb.namePostings = nil + } +} + +func (b *Builder) newShardBuilder() (*ShardBuilder, error) { desc := b.opts.RepositoryDescription desc.HasSymbols = !b.opts.DisableCTags && b.opts.CTagsPath != "" desc.SubRepoMap = b.opts.SubRepositories desc.IndexOptions = b.opts.GetHash() - shardBuilder, err := zoekt.NewIndexBuilder(&desc) - if err != nil { + content := b.getPostingsBuilder() + name := b.getPostingsBuilder() + shardBuilder := newShardBuilderWithPostings(content, name) + if err := shardBuilder.setRepository(&desc); err != nil { return nil, err } shardBuilder.IndexTime = b.indexTime @@ -1035,7 +1068,7 @@ func (b *Builder) newShardBuilder() (*zoekt.IndexBuilder, error) { return shardBuilder, nil } -func (b *Builder) writeShard(fn string, ib *zoekt.IndexBuilder) (*finishedShard, error) { +func (b *Builder) writeShard(fn string, ib *ShardBuilder) (*finishedShard, error) { dir := filepath.Dir(fn) if err := os.MkdirAll(dir, 0o700); err != nil { return nil, err @@ -1092,3 +1125,8 @@ func (e *deltaIndexOptionsMismatchError) Error() string { // umask holds the Umask of the current process var umask os.FileMode + +func init() { + umask = os.FileMode(unix.Umask(0)) + unix.Umask(int(umask)) +} diff --git a/build/builder_test.go b/index/builder_test.go similarity index 78% rename from build/builder_test.go rename to index/builder_test.go index aec77a4e3..f4eb5d0c8 100644 --- a/build/builder_test.go +++ b/index/builder_test.go @@ -1,12 +1,14 @@ -package build +package index import ( "errors" "flag" + "fmt" "io" "log" "os" "path/filepath" + "reflect" "strconv" "strings" "testing" @@ -14,6 +16,8 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/require" "github.com/sourcegraph/zoekt" ) @@ -27,8 +31,9 @@ func TestBuildv16(t *testing.T) { opts := Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ - Name: "repo", - Source: "./testdata/repo/", + Name: "repo", + Source: "./testdata/repo/", + Metadata: map[string]string{"foo": "bar"}, }, DisableCTags: true, } @@ -53,7 +58,7 @@ func TestBuildv16(t *testing.T) { // fields indexTime and id depend on time. For this test, we copy the fields from // the old shard. - _, wantMetadata, err := zoekt.ReadMetadataPath(wantP) + _, wantMetadata, err := ReadMetadataPath(wantP) if err != nil { t.Fatal(err) } @@ -235,14 +240,14 @@ func TestDontCountContentOfSkippedFiles(t *testing.T) { // content with at least 100 bytes binary := append([]byte("abc def \x00"), make([]byte, 100)...) - err = b.Add(zoekt.Document{ + err = b.Add(Document{ Name: "f1", Content: binary, }) if err != nil { t.Fatal(err) } - if len(b.todo) != 1 || b.todo[0].SkipReason == "" { + if len(b.todo) != 1 || b.todo[0].SkipReason == SkipReasonNone { t.Fatalf("document should have been skipped") } if b.todo[0].Content != nil { @@ -253,6 +258,114 @@ func TestDontCountContentOfSkippedFiles(t *testing.T) { } } +func TestPartialSuccess(t *testing.T) { + dir := t.TempDir() + + opts := Options{ + IndexDir: dir, + ShardMax: 1024, + SizeMax: 1 << 20, + Parallelism: 1, + } + opts.RepositoryDescription.Name = "repo" + opts.SetDefaults() + + b, err := NewBuilder(opts) + if err != nil { + t.Fatalf("NewBuilder: %v", err) + } + + for i := range 4 { + nm := fmt.Sprintf("F%d", i) + _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128))) + } + b.buildError = fmt.Errorf("any error") + + // No error checking. + _ = b.Finish() + + // Finish cleans up temporary files. + if fs, err := filepath.Glob(dir + "/*"); err != nil { + t.Errorf("glob(%s): %v", dir, err) + } else if len(fs) != 0 { + t.Errorf("got shards %v, want []", fs) + } +} + +// Tests that we skip looping over repos in compound shards when we know that +// the repository we are looking for is not in the shard. +func TestSkipCompoundShards(t *testing.T) { + metricCompoundShardLookups.Reset() + + compoundShards := [][]zoekt.Repository{ + { + {Name: "repoA", ID: 1}, + {Name: "repoB", ID: 2}, + {Name: "repoC", ID: 3}, + }, + { + {Name: "repoD", ID: 4}, + {Name: "repoE", ID: 5}, + {Name: "repoF", ID: 6}, + {Name: "repoF", ID: 7}, + {Name: "repoF", ID: 8}, + }, + } + var lookForRepoID uint32 = 99 + wantSkippedCount := 2 + + indexDir := t.TempDir() + for _, repositoryGroup := range compoundShards { + createTestCompoundShard(t, indexDir, repositoryGroup) + } + o := &Options{ + IndexDir: indexDir, + RepositoryDescription: zoekt.Repository{ID: lookForRepoID}, + } + + shard := o.findCompoundShard() + require.Empty(t, shard) + + // Check if the "skipped" counter was incremented + skippedCount := int(testutil.ToFloat64(metricCompoundShardLookups.WithLabelValues("skipped"))) + require.Equal(t, wantSkippedCount, skippedCount) +} + +// With optimization +// BenchmarkFindCompoundShard-16 33505 36016 ns/op +// +// Without optimization +// BenchmarkFindCompoundShard-16 76 15568589 ns/op +func BenchmarkFindCompoundShard(b *testing.B) { + // Generate a large compound shard + const numRepos = 5000 + repositories := make([]zoekt.Repository, numRepos) + for i := range numRepos { + repositories[i] = zoekt.Repository{ + Name: fmt.Sprintf("repo%d", i+1), + ID: uint32(i + 1), + } + } + indexDir := b.TempDir() + createTestCompoundShard(b, indexDir, repositories) + + // pick id that is not in the shard + var searchRepoID uint32 = numRepos + 1 + + b.ResetTimer() + for i := 0; i < b.N; i++ { + o := &Options{ + IndexDir: indexDir, + RepositoryDescription: zoekt.Repository{ID: searchRepoID}, + } + + shard := o.findCompoundShard() + if shard != "" { + b.Fatal("expected empty result") + } + } +} + func TestOptions_FindAllShards(t *testing.T) { type simpleShard struct { Repository zoekt.Repository @@ -326,17 +439,17 @@ func TestOptions_FindAllShards(t *testing.T) { compoundShards: [][]zoekt.Repository{ { {Name: "repoA", ID: 1}, - {Name: "sameName", ID: 2}, - {Name: "sameName", ID: 3}, + {Name: "repoB", ID: 2}, + {Name: "repoC", ID: 3}, }, { - {Name: "repoB", ID: 4}, - {Name: "sameName", ID: 5}, - {Name: "sameName", ID: 6}, + {Name: "repoD", ID: 4}, + {Name: "repoE", ID: 5}, + {Name: "repoF", ID: 6}, }, }, expectedShardCount: 1, - expectedRepository: zoekt.Repository{Name: "sameName", ID: 5}, + expectedRepository: zoekt.Repository{Name: "something-else", ID: 5}, }, } for _, tt := range tests { @@ -371,7 +484,7 @@ func TestOptions_FindAllShards(t *testing.T) { if tt.expectedShardCount > 0 { for _, s := range shards { // all shards should contain the metadata for the desired repository - repos, _, err := zoekt.ReadMetadataPathAlive(s) + repos, _, err := ReadMetadataPathAlive(s) if err != nil { t.Fatalf("reading metadata from shard %q: %s", s, err) } @@ -620,7 +733,7 @@ func TestBuilder_DeltaShardsMetadataInOlderShards(t *testing.T) { } for _, s := range shards { - repositories, _, err := zoekt.ReadMetadataPathAlive(s) + repositories, _, err := ReadMetadataPathAlive(s) if err != nil { t.Fatalf("reading repository metadata from shard %q", s) } @@ -761,51 +874,7 @@ func TestFindRepositoryMetadata(t *testing.T) { } } -func TestIsLowPriority(t *testing.T) { - cases := []string{ - "builder_test.go", - "test/TestQuery.java", - "search/vendor/thirdparty.cc", - "search/node_modules/search/index.js", - "search.min.js", - "internal/search.js.map", - } - - for _, tt := range cases { - t.Run(tt, func(t *testing.T) { - if !IsLowPriority(tt, nil) { - t.Errorf("expected file '%s' to be low priority", tt) - } - }) - } - - negativeCases := []string{ - "builder.go", - "RoutesTrigger.java", - "search.js", - } - - for _, tt := range negativeCases { - t.Run(tt, func(t *testing.T) { - if IsLowPriority(tt, nil) { - t.Errorf("did not expect file '%s' to be low priority", tt) - } - }) - } - - // Explicitly check that content is important by using the same filename but - // different content. - normal := "package mock\n\nvar Mock struct {}" - generated := "// Code generated by mock\npackage mock\n\nvar Mock struct {}" - if IsLowPriority("mock.go", []byte(normal)) { - t.Error("expected non-generated content to not be low priority") - } - if !IsLowPriority("mock.go", []byte(generated)) { - t.Error("expected generated content to be low priority") - } -} - -func createTestShard(t *testing.T, indexDir string, r zoekt.Repository, numShards int, optFns ...func(options *Options)) []string { +func createTestShard(t testing.TB, indexDir string, r zoekt.Repository, numShards int, optFns ...func(options *Options)) []string { t.Helper() if err := os.MkdirAll(filepath.Dir(indexDir), 0o700); err != nil { @@ -833,12 +902,12 @@ func createTestShard(t *testing.T, indexDir string, r zoekt.Repository, numShard numShards = 1 } - for i := 0; i < numShards; i++ { + for i := range numShards { // Create entries (file + contents) that are ~100 bytes each. // This (along with our shardMax setting of 75 bytes) means that each shard // will contain at most one of these. fileName := strconv.Itoa(i) - document := zoekt.Document{Name: fileName, Content: []byte(strings.Repeat("A", 100))} + document := Document{Name: fileName, Content: []byte(strings.Repeat("A", 100))} for _, branch := range o.RepositoryDescription.Branches { document.Branches = append(document.Branches, branch.Name) } @@ -856,7 +925,7 @@ func createTestShard(t *testing.T, indexDir string, r zoekt.Repository, numShard return o.FindAllShards() } -func createTestCompoundShard(t *testing.T, indexDir string, repositories []zoekt.Repository, optFns ...func(options *Options)) { +func createTestCompoundShard(t testing.TB, indexDir string, repositories []zoekt.Repository, optFns ...func(options *Options)) { t.Helper() var shardNames []string @@ -879,7 +948,7 @@ func createTestCompoundShard(t *testing.T, indexDir string, repositories []zoekt } // load the normal shards that we created - var files []zoekt.IndexFile + var files []IndexFile for _, shard := range shardNames { f, err := os.Open(shard) if err != nil { @@ -887,7 +956,7 @@ func createTestCompoundShard(t *testing.T, indexDir string, repositories []zoekt } defer f.Close() - indexFile, err := zoekt.NewIndexFile(f) + indexFile, err := NewIndexFile(f) if err != nil { t.Fatalf("creating index file: %s", err) } @@ -897,7 +966,7 @@ func createTestCompoundShard(t *testing.T, indexDir string, repositories []zoekt } // merge all the simple shards into a compound shard - tmpName, dstName, err := zoekt.Merge(indexDir, files...) + tmpName, dstName, err := Merge(indexDir, files...) if err != nil { t.Fatalf("merging index files into compound shard: %s", err) } @@ -1000,3 +1069,136 @@ func TestIgnoreSizeMax(t *testing.T) { }) } } + +type filerankCase struct { + name string + docs []*Document + want []int +} + +func testFileRankAspect(t *testing.T, c filerankCase) { + var want []*Document + for _, j := range c.want { + want = append(want, c.docs[j]) + } + + got := make([]*Document, len(c.docs)) + copy(got, c.docs) + for _, d := range got { + DetermineFileCategory(d) + } + sortDocuments(got) + + print := func(ds []*Document) string { + r := "" + for _, d := range ds { + r += fmt.Sprintf("%v, ", d) + } + return r + } + if !reflect.DeepEqual(got, want) { + t.Errorf("got docs [%v], want [%v]", print(got), print(want)) + } +} + +func TestFileRank(t *testing.T) { + for _, c := range []filerankCase{{ + name: "filename", + docs: []*Document{ + { + Name: "longlonglong", + Content: []byte("bla"), + }, + { + Name: "short", + Content: []byte("bla"), + }, + }, + want: []int{1, 0}, + }, { + name: "test", + docs: []*Document{ + { + Name: "foo_test.go", + Content: []byte("bla"), + }, + { + Name: "longlonglong", + Content: []byte("bla"), + }, + }, + want: []int{1, 0}, + }, { + name: "content", + docs: []*Document{ + { + Content: []byte("bla"), + }, + { + Content: []byte("blablablabla"), + }, + { + Content: []byte("blabla"), + }, + }, + want: []int{0, 2, 1}, + }, { + name: "skipped docs", + docs: []*Document{ + { + Name: "binary_file", + SkipReason: SkipReasonBinary, + }, + { + Name: "some_test.go", + Content: []byte("bla"), + }, + { + Name: "large_file.go", + SkipReason: SkipReasonTooLarge, + }, + { + Name: "file.go", + Content: []byte("blabla"), + }, + }, + want: []int{3, 1, 0, 2}, + }} { + t.Run(c.name, func(t *testing.T) { + testFileRankAspect(t, c) + }) + } +} + +func TestOptions_shardName(t *testing.T) { + opts := Options{ + IndexDir: "/data", + RepositoryDescription: zoekt.Repository{ + Name: "a/b", + TenantID: 123, + ID: 456, + }, + } + + t.Setenv("WORKSPACES_API_URL", "") + if got, want := opts.shardNameVersion(16, 0), "/data/a%2Fb_v16.00000.zoekt"; got != want { + t.Fatalf("expected shard name to be repo name based:\ngot: %q\nwant: %q", got, want) + } + + t.Setenv("WORKSPACES_API_URL", "http://example.com") + if got, want := opts.shardNameVersion(16, 0), "/data/000000123_000000456_v16.00000.zoekt"; got != want { + t.Fatalf("expected shard name to be ID based:\ngot: %q\nwant: %q", got, want) + } + + // If something goes wrong and TenantID and RepoID is unset, we create a + // name which won't be visible by any tenant. + opts = Options{ + IndexDir: "/data", + RepositoryDescription: zoekt.Repository{ + Name: "a/b", + }, + } + if got, want := opts.shardNameVersion(16, 0), "/data/000000000_000000000_v16.00000.zoekt"; got != want { + t.Fatalf("expected shard name to be with no tenant:\ngot: %q\nwant: %q", got, want) + } +} diff --git a/contentprovider.go b/index/contentprovider.go similarity index 79% rename from contentprovider.go rename to index/contentprovider.go index 34600f303..7bb52a719 100644 --- a/contentprovider.go +++ b/index/contentprovider.go @@ -12,20 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bytes" - "fmt" "log" "path" "slices" "sort" - "strings" "unicode" "unicode/utf8" - "github.com/sourcegraph/zoekt/ctags" + "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/internal/ctags" ) var _ = log.Println @@ -34,7 +33,7 @@ var _ = log.Println // content with the same code. type contentProvider struct { id *indexData - stats *Stats + stats *zoekt.Stats // mutable err error @@ -145,7 +144,7 @@ func (p *contentProvider) findOffset(filename bool, r uint32) uint32 { // // Note: the byte slices may be backed by mmapped data, so before being // returned by the API it needs to be copied. -func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []LineMatch { +func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, language string, opts *zoekt.SearchOptions) []zoekt.LineMatch { var filenameMatches []*candidateMatch contentMatches := make([]*candidateMatch, 0, len(ms)) @@ -160,27 +159,27 @@ func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, // If there are any content matches, we only return these and skip filename matches. if len(contentMatches) > 0 { contentMatches = breakMatchesOnNewlines(contentMatches, p.data(false)) - return p.fillContentMatches(contentMatches, numContextLines, language, debug) + return p.fillContentMatches(contentMatches, numContextLines, language, opts) } - // Otherwise, we return a single line containing the filematch match. - bestMatch, _ := p.candidateMatchScore(filenameMatches, language, debug) - res := LineMatch{ + // Otherwise, we return a single line containing the filematch index. + lineScore, _ := p.scoreLine(filenameMatches, language, -1 /* must pass -1 for filenames */, opts) + res := zoekt.LineMatch{ Line: p.id.fileName(p.idx), FileName: true, - Score: bestMatch.score, - DebugScore: bestMatch.debugScore, + Score: lineScore.score, + DebugScore: lineScore.debugScore, } for _, m := range ms { - res.LineFragments = append(res.LineFragments, LineFragmentMatch{ + res.LineFragments = append(res.LineFragments, zoekt.LineFragmentMatch{ LineOffset: int(m.byteOffset), MatchLength: int(m.byteMatchSz), Offset: m.byteOffset, }) } - return []LineMatch{res} + return []zoekt.LineMatch{res} } @@ -192,7 +191,7 @@ func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, // // Note: the byte slices may be backed by mmapped data, so before being // returned by the API it needs to be copied. -func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []ChunkMatch { +func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines int, language string, opts *zoekt.SearchOptions) []zoekt.ChunkMatch { var filenameMatches []*candidateMatch contentMatches := make([]*candidateMatch, 0, len(ms)) @@ -206,21 +205,21 @@ func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines // If there are any content matches, we only return these and skip filename matches. if len(contentMatches) > 0 { - return p.fillContentChunkMatches(contentMatches, numContextLines, language, debug) + return p.fillContentChunkMatches(contentMatches, numContextLines, language, opts) } - // Otherwise, we return a single chunk representing the filename match. - bestMatch, _ := p.candidateMatchScore(filenameMatches, language, debug) + // Otherwise, we return a single chunk representing the filename index. + lineScore, _ := p.scoreLine(filenameMatches, language, -1 /* must pass -1 for filenames */, opts) fileName := p.id.fileName(p.idx) - ranges := make([]Range, 0, len(ms)) + ranges := make([]zoekt.Range, 0, len(ms)) for _, m := range ms { - ranges = append(ranges, Range{ - Start: Location{ + ranges = append(ranges, zoekt.Range{ + Start: zoekt.Location{ ByteOffset: m.byteOffset, LineNumber: 1, Column: uint32(utf8.RuneCount(fileName[:m.byteOffset]) + 1), }, - End: Location{ + End: zoekt.Location{ ByteOffset: m.byteOffset + m.byteMatchSz, LineNumber: 1, Column: uint32(utf8.RuneCount(fileName[:m.byteOffset+m.byteMatchSz]) + 1), @@ -228,18 +227,18 @@ func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines }) } - return []ChunkMatch{{ + return []zoekt.ChunkMatch{{ Content: fileName, - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, Ranges: ranges, FileName: true, - Score: bestMatch.score, - DebugScore: bestMatch.debugScore, + Score: lineScore.score, + DebugScore: lineScore.debugScore, }} } -func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []LineMatch { - var result []LineMatch +func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLines int, language string, opts *zoekt.SearchOptions) []zoekt.LineMatch { + var result []zoekt.LineMatch for len(ms) > 0 { m := ms[0] num := p.newlines().atOffset(m.byteOffset) @@ -273,7 +272,7 @@ func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLin // Due to merging matches, we may have a match that // crosses a line boundary. Prevent confusion by - // taking lines until we pass the last match + // taking lines until we pass the last index for nextLineStart < len(data) && endMatch > uint32(nextLineStart) { next := bytes.IndexByte(data[nextLineStart:], '\n') if next == -1 { @@ -284,7 +283,7 @@ func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLin } } - finalMatch := LineMatch{ + finalMatch := zoekt.LineMatch{ LineStart: lineStart, LineEnd: nextLineStart, LineNumber: num, @@ -296,16 +295,17 @@ func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLin finalMatch.After = p.newlines().getLines(data, num+1, num+1+numContextLines) } - bestMatch, symbolInfo := p.candidateMatchScore(lineCands, language, debug) - finalMatch.Score = bestMatch.score - finalMatch.DebugScore = bestMatch.debugScore + lineScore, symbolInfo := p.scoreLine(lineCands, language, num, opts) + finalMatch.Score = lineScore.score + finalMatch.DebugScore = lineScore.debugScore for i, m := range lineCands { - fragment := LineFragmentMatch{ + fragment := zoekt.LineFragmentMatch{ Offset: m.byteOffset, LineOffset: int(m.byteOffset) - lineStart, MatchLength: int(m.byteMatchSz), } + if i < len(symbolInfo) && symbolInfo[i] != nil { fragment.SymbolInfo = symbolInfo[i] } @@ -317,8 +317,7 @@ func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLin return result } -func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numContextLines int, language string, debug bool) []ChunkMatch { - newlines := p.newlines() +func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numContextLines int, language string, opts *zoekt.SearchOptions) []zoekt.ChunkMatch { data := p.data(false) // columnHelper prevents O(len(ms) * len(data)) lookups for all columns. @@ -332,24 +331,23 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte sort.Sort((sortByOffsetSlice)(ms)) } + newlines := p.newlines() chunks := chunkCandidates(ms, newlines, numContextLines) - chunkMatches := make([]ChunkMatch, 0, len(chunks)) + chunkMatches := make([]zoekt.ChunkMatch, 0, len(chunks)) for _, chunk := range chunks { - bestMatch, symbolInfo := p.candidateMatchScore(chunk.candidates, language, debug) - - ranges := make([]Range, 0, len(chunk.candidates)) + ranges := make([]zoekt.Range, 0, len(chunk.candidates)) for _, cm := range chunk.candidates { startOffset := cm.byteOffset endOffset := cm.byteOffset + cm.byteMatchSz startLine, endLine := newlines.offsetRangeToLineRange(startOffset, endOffset) - ranges = append(ranges, Range{ - Start: Location{ + ranges = append(ranges, zoekt.Range{ + Start: zoekt.Location{ ByteOffset: startOffset, LineNumber: uint32(startLine), Column: columnHelper.get(int(newlines.lineStart(startLine)), startOffset), }, - End: Location{ + End: zoekt.Location{ ByteOffset: endOffset, LineNumber: uint32(endLine), Column: columnHelper.get(int(newlines.lineStart(endLine)), endOffset), @@ -357,23 +355,13 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte }) } - firstLineNumber := int(chunk.firstLine) - numContextLines - if firstLineNumber < 1 { - firstLineNumber = 1 - } + firstLineNumber := max(int(chunk.firstLine)-numContextLines, 1) firstLineStart := newlines.lineStart(firstLineNumber) - bestLineMatch := 0 - if bestMatch.match != nil { - bestLineMatch = newlines.atOffset(bestMatch.match.byteOffset) - if debug { - bestMatch.debugScore = fmt.Sprintf("%s, (line: %d)", bestMatch.debugScore, bestLineMatch) - } - } - - chunkMatches = append(chunkMatches, ChunkMatch{ + chunkScore, symbolInfo := p.scoreChunk(chunk.candidates, language, opts) + chunkMatches = append(chunkMatches, zoekt.ChunkMatch{ Content: newlines.getLines(data, firstLineNumber, int(chunk.lastLine)+numContextLines+1), - ContentStart: Location{ + ContentStart: zoekt.Location{ ByteOffset: firstLineStart, LineNumber: uint32(firstLineNumber), Column: 1, @@ -381,9 +369,9 @@ func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numConte FileName: false, Ranges: ranges, SymbolInfo: symbolInfo, - BestLineMatch: uint32(bestLineMatch), - Score: bestMatch.score, - DebugScore: bestMatch.debugScore, + BestLineMatch: uint32(chunkScore.bestLine), + Score: chunkScore.score, + DebugScore: chunkScore.debugScore, }) } return chunkMatches @@ -405,6 +393,7 @@ type candidateChunk struct { // output invariants: if you flatten candidates the input invariant is retained. func chunkCandidates(ms []*candidateMatch, newlines newlines, numContextLines int) []candidateChunk { var chunks []candidateChunk + for _, m := range ms { startOffset := m.byteOffset endOffset := m.byteOffset + m.byteMatchSz @@ -536,10 +525,6 @@ const ( scoreKindMatch = 100.0 scoreFactorAtomMatch = 400.0 - // File-only scoring signals. For now these are also bounded ~9000 to give them - // equal weight with the query-dependent signals. - scoreFileRankFactor = 9000.0 - // Used for ordering line and chunk matches within a file. scoreLineOrderFactor = 1.0 @@ -612,7 +597,7 @@ func (p *contentProvider) matchesSymbol(cm *candidateMatch) bool { return ok } -func (p *contentProvider) findSymbol(cm *candidateMatch) (DocumentSection, *Symbol, bool) { +func (p *contentProvider) findSymbol(cm *candidateMatch) (DocumentSection, *zoekt.Symbol, bool) { if cm.fileName { return DocumentSection{}, nil, false } @@ -643,133 +628,6 @@ func (p *contentProvider) findSymbol(cm *candidateMatch) (DocumentSection, *Symb return sec, si, true } -// calculateTermFrequency computes the term frequency for the file match. -// Notes: -// * Filename matches count more than content matches. This mimics a common text search strategy to 'boost' matches on document titles. -// * Symbol matches also count more than content matches, to reward matches on symbol definitions. -func (p *contentProvider) calculateTermFrequency(cands []*candidateMatch, df termDocumentFrequency) map[string]int { - // Treat each candidate match as a term and compute the frequencies. For now, ignore case - // sensitivity and treat filenames and symbols the same as content. - termFreqs := map[string]int{} - for _, m := range cands { - term := string(m.substrLowered) - if m.fileName || p.matchesSymbol(m) { - termFreqs[term] += 5 - } else { - termFreqs[term]++ - } - } - - for term := range termFreqs { - df[term] += 1 - } - return termFreqs -} - -// scoredMatch holds the score information for a candidate match. -type scoredMatch struct { - score float64 - debugScore string - match *candidateMatch -} - -// candidateMatchScore scores all candidate matches and returns the best-scoring match plus its score information. -// Invariant: there should be at least one input candidate, len(ms) > 0. -func (p *contentProvider) candidateMatchScore(ms []*candidateMatch, language string, debug bool) (scoredMatch, []*Symbol) { - score := 0.0 - what := "" - - addScore := func(w string, s float64) { - if s != 0 && debug { - what += fmt.Sprintf("%s:%.2f, ", w, s) - } - score += s - } - - filename := p.data(true) - var symbolInfo []*Symbol - - var bestMatch scoredMatch - for i, m := range ms { - data := p.data(m.fileName) - - endOffset := m.byteOffset + m.byteMatchSz - startBoundary := m.byteOffset < uint32(len(data)) && (m.byteOffset == 0 || byteClass(data[m.byteOffset-1]) != byteClass(data[m.byteOffset])) - endBoundary := endOffset > 0 && (endOffset == uint32(len(data)) || byteClass(data[endOffset-1]) != byteClass(data[endOffset])) - - score = 0 - what = "" - - if startBoundary && endBoundary { - addScore("WordMatch", scoreWordMatch) - } else if startBoundary || endBoundary { - addScore("PartialWordMatch", scorePartialWordMatch) - } - - if m.fileName { - sep := bytes.LastIndexByte(data, '/') - startMatch := int(m.byteOffset) == sep+1 - endMatch := endOffset == uint32(len(data)) - if startMatch && endMatch { - addScore("Base", scoreBase) - } else if startMatch || endMatch { - addScore("EdgeBase", (scoreBase+scorePartialBase)/2) - } else if sep < int(m.byteOffset) { - addScore("InnerBase", scorePartialBase) - } - } else if sec, si, ok := p.findSymbol(m); ok { - startMatch := sec.Start == m.byteOffset - endMatch := sec.End == endOffset - if startMatch && endMatch { - addScore("Symbol", scoreSymbol) - } else if startMatch || endMatch { - addScore("EdgeSymbol", (scoreSymbol+scorePartialSymbol)/2) - } else { - addScore("OverlapSymbol", scorePartialSymbol) - } - - // Score based on symbol data - if si != nil { - symbolKind := ctags.ParseSymbolKind(si.Kind) - sym := sectionSlice(data, sec) - - addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreSymbolKind(language, filename, sym, symbolKind)) - - // This is from a symbol tree, so we need to store the symbol - // information. - if m.symbol { - if symbolInfo == nil { - symbolInfo = make([]*Symbol, len(ms)) - } - // findSymbols does not hydrate in Sym. So we need to store it. - si.Sym = string(sym) - symbolInfo[i] = si - } - } - } - - // scoreWeight != 1 means it affects score - if !epsilonEqualsOne(m.scoreWeight) { - score = score * m.scoreWeight - if debug { - what += fmt.Sprintf("boost:%.2f, ", m.scoreWeight) - } - } - - if score > bestMatch.score { - bestMatch.score = score - bestMatch.debugScore = what - bestMatch.match = m - } - } - - if debug { - bestMatch.debugScore = fmt.Sprintf("score:%.2f <- %s", bestMatch.score, strings.TrimSuffix(bestMatch.debugScore, ", ")) - } - - return bestMatch, symbolInfo -} - // sectionSlice will return data[sec.Start:sec.End] but will clip Start and // End such that it won't be out of range. func sectionSlice(data []byte, sec DocumentSection) []byte { @@ -1011,29 +869,29 @@ func scoreSymbolKind(language string, filename []byte, sym []byte, kind ctags.Sy return factor * scoreKindMatch } -type matchScoreSlice []LineMatch +type matchScoreSlice []zoekt.LineMatch func (m matchScoreSlice) Len() int { return len(m) } func (m matchScoreSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] } func (m matchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score } -type chunkMatchScoreSlice []ChunkMatch +type chunkMatchScoreSlice []zoekt.ChunkMatch func (m chunkMatchScoreSlice) Len() int { return len(m) } func (m chunkMatchScoreSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] } func (m chunkMatchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score } -type fileMatchesByScore []FileMatch +type fileMatchesByScore []zoekt.FileMatch func (m fileMatchesByScore) Len() int { return len(m) } func (m fileMatchesByScore) Swap(i, j int) { m[i], m[j] = m[j], m[i] } func (m fileMatchesByScore) Less(i, j int) bool { return m[i].Score > m[j].Score } -func sortMatchesByScore(ms []LineMatch) { +func sortMatchesByScore(ms []zoekt.LineMatch) { sort.Sort(matchScoreSlice(ms)) } -func sortChunkMatchesByScore(ms []ChunkMatch) { +func sortChunkMatchesByScore(ms []zoekt.ChunkMatch) { sort.Sort(chunkMatchScoreSlice(ms)) } @@ -1044,14 +902,14 @@ func sortChunkMatchesByScore(ms []ChunkMatch) { // // We don't only use the scores, we will also boost some results to present // files with novel extensions. -func SortFiles(ms []FileMatch) { +func SortFiles(ms []zoekt.FileMatch) { sort.Sort(fileMatchesByScore(ms)) // Boost a file extension not in the top 3 to the third filematch. boostNovelExtension(ms, 2, 0.9) } -func boostNovelExtension(ms []FileMatch, boostOffset int, minScoreRatio float64) { +func boostNovelExtension(ms []zoekt.FileMatch, boostOffset int, minScoreRatio float64) { if len(ms) <= boostOffset+1 { return } diff --git a/contentprovider_test.go b/index/contentprovider_test.go similarity index 99% rename from contentprovider_test.go rename to index/contentprovider_test.go index 7a4024b5a..12045a40c 100644 --- a/contentprovider_test.go +++ b/index/contentprovider_test.go @@ -1,4 +1,4 @@ -package zoekt +package index import ( "bytes" diff --git a/build/ctags.go b/index/ctags.go similarity index 93% rename from build/ctags.go rename to index/ctags.go index 500daf78d..d87b15bc9 100644 --- a/build/ctags.go +++ b/index/ctags.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package build +package index import ( "bytes" @@ -24,7 +24,7 @@ import ( "time" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/ctags" + "github.com/sourcegraph/zoekt/internal/ctags" ) // Make sure all names are lowercase here, since they are normalized @@ -41,7 +41,7 @@ func normalizeLanguage(filetype string) string { return normalized } -func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserBins ctags.ParserBinMap) error { +func parseSymbols(todo []*Document, languageMap ctags.LanguageMap, parserBins ctags.ParserBinMap) error { monitor := newMonitor() defer monitor.Stop() @@ -55,8 +55,6 @@ func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserB continue } - zoekt.DetermineLanguageIfUnknown(doc) - parserType := languageMap[normalizeLanguage(doc.Language)] if parserType == ctags.NoCTags { continue @@ -92,7 +90,7 @@ func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserB // overlaps finds the proper position to insert a zoekt.DocumentSection with // "start and "end" into "symOffsets". It returns -1 if the new section overlaps // with one of the existing ones. -func overlaps(symOffsets []zoekt.DocumentSection, start, end uint32) int { +func overlaps(symOffsets []DocumentSection, start, end uint32) int { i := 0 for i = len(symOffsets) - 1; i >= 0; i-- { // The most common case is that we exit here, because symOffsets is sorted by @@ -120,9 +118,9 @@ type tagsToSections struct { // corresponding metadata (zoekt.Symbol). // // This can not be called concurrently. -func (t *tagsToSections) Convert(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) { +func (t *tagsToSections) Convert(content []byte, tags []*ctags.Entry) ([]DocumentSection, []*zoekt.Symbol, error) { nls := t.newLinesIndices(content) - symOffsets := make([]zoekt.DocumentSection, 0, len(tags)) + symOffsets := make([]DocumentSection, 0, len(tags)) symMetaData := make([]*zoekt.Symbol, 0, len(tags)) for _, t := range tags { @@ -162,7 +160,7 @@ func (t *tagsToSections) Convert(content []byte, tags []*ctags.Entry) ([]zoekt.D continue } - symOffsets = slices.Insert(symOffsets, i, zoekt.DocumentSection{ + symOffsets = slices.Insert(symOffsets, i, DocumentSection{ Start: start, End: endSym, }) @@ -242,7 +240,7 @@ func newMonitor() *monitor { return m } -func (m *monitor) BeginParsing(doc *zoekt.Document) { +func (m *monitor) BeginParsing(doc *Document) { now := time.Now() m.mu.Lock() m.lastUpdate = now diff --git a/build/ctags_test.go b/index/ctags_test.go similarity index 85% rename from build/ctags_test.go rename to index/ctags_test.go index 185322982..3d950f33c 100644 --- a/build/ctags_test.go +++ b/index/ctags_test.go @@ -12,15 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -package build +package index import ( "os" "reflect" "testing" - "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/ctags" + "github.com/sourcegraph/zoekt/internal/ctags" ) func TestTagsToSections(t *testing.T) { @@ -64,7 +63,7 @@ func TestTagsToSectionsMultiple(t *testing.T) { t.Fatal("tagsToSections", err) } - want := []zoekt.DocumentSection{ + want := []DocumentSection{ {Start: 16, End: 17}, {Start: 23, End: 24}, } @@ -97,7 +96,7 @@ func TestTagsToSectionsReverse(t *testing.T) { t.Fatal("tagsToSections", err) } - want := []zoekt.DocumentSection{ + want := []DocumentSection{ {Start: 15, End: 18}, {Start: 20, End: 23}, {Start: 26, End: 29}, @@ -152,7 +151,7 @@ func TestTagsToSectionsEOF(t *testing.T) { func TestOverlaps(t *testing.T) { tests := []struct { - documentSections []zoekt.DocumentSection + documentSections []DocumentSection start uint32 end uint32 pos int @@ -161,37 +160,37 @@ func TestOverlaps(t *testing.T) { // overlap // { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 6, end: 9, pos: -1, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 6, end: 12, pos: -1, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 4, end: 9, pos: -1, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 1, end: 9, pos: -1, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 0, end: 25, pos: -1, }, { - documentSections: []zoekt.DocumentSection{{0, 3}}, + documentSections: []DocumentSection{{0, 3}}, start: 0, end: 1, pos: -1, @@ -200,37 +199,37 @@ func TestOverlaps(t *testing.T) { // NO overlap // { - documentSections: []zoekt.DocumentSection{{2, 3}, {5, 10}}, + documentSections: []DocumentSection{{2, 3}, {5, 10}}, start: 0, end: 2, pos: 0, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 3, end: 4, pos: 1, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 3, end: 5, pos: 1, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}}, start: 11, end: 14, pos: 2, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}, {14, 15}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}, {14, 15}}, start: 11, end: 13, pos: 2, }, { - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}, {14, 15}}, + documentSections: []DocumentSection{{0, 3}, {5, 10}, {14, 15}}, start: 18, end: 19, pos: 3, diff --git a/index/docmatchtreecache.go b/index/docmatchtreecache.go new file mode 100644 index 000000000..e9c2c441a --- /dev/null +++ b/index/docmatchtreecache.go @@ -0,0 +1,64 @@ +package index + +import ( + "os" + "strconv" + "sync" +) + +// docMatchTreeCache is a cache for docMatchTrees with random eviction. +type docMatchTreeCache struct { + maxEntries int + cache map[docMatchTreeCacheKey]*docMatchTree + mu sync.RWMutex +} + +type docMatchTreeCacheKey struct { + field string + value string +} + +// newDocMatchTreeCache creates a new docMatchTreeCache. +// If cacheSize is 0, the value from the ZOEKT_DOCMATCHTREE_CACHE environment +// variable will be used if it is present. +func newDocMatchTreeCache(cacheSize int) *docMatchTreeCache { + if v := os.Getenv("ZOEKT_DOCMATCHTREE_CACHE"); cacheSize == 0 && v != "" { + var err error + cacheSize, err = strconv.Atoi(v) + if err != nil { + cacheSize = 0 + } + } + return &docMatchTreeCache{ + maxEntries: cacheSize, + cache: make(map[docMatchTreeCacheKey]*docMatchTree), + } +} + +func (c *docMatchTreeCache) Get(field, value string) (*docMatchTree, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + k := docMatchTreeCacheKey{field, value} + mt, ok := c.cache[k] + return mt, ok +} + +func (c *docMatchTreeCache) Add(field, value string, mt *docMatchTree) { + if c.maxEntries == 0 { + return + } + c.mu.Lock() + defer c.mu.Unlock() + k := docMatchTreeCacheKey{field, value} + c.cache[k] = mt + if len(c.cache) > c.maxEntries { + c.evictRandom() + } +} + +func (c *docMatchTreeCache) evictRandom() { + for k := range c.cache { + delete(c.cache, k) + break + } +} diff --git a/index/docmatchtreecache_test.go b/index/docmatchtreecache_test.go new file mode 100644 index 000000000..c2d3aa10e --- /dev/null +++ b/index/docmatchtreecache_test.go @@ -0,0 +1,98 @@ +package index + +import ( + "strconv" + "testing" +) + +func TestDocMatchTreeCache_Basic(t *testing.T) { + cache := newDocMatchTreeCache(2) + + mt1 := &docMatchTree{} + mt2 := &docMatchTree{} + mt3 := &docMatchTree{} + + // Add and Get + cache.Add("f1", "v1", mt1) + cache.Add("f2", "v2", mt2) + if v, ok := cache.Get("f1", "v1"); !ok || v != mt1 { + t.Errorf("expected mt1, got %v", v) + } + if v, ok := cache.Get("f2", "v2"); !ok || v != mt2 { + t.Errorf("expected mt2, got %v", v) + } + + // Add triggers eviction (random, so one of the two should be evicted) + cache.Add("f3", "v3", mt3) + v1, ok1 := cache.Get("f1", "v1") + v2, ok2 := cache.Get("f2", "v2") + v3, ok3 := cache.Get("f3", "v3") + + // Should have exactly 2 items + present := 0 + if ok1 { + present++ + if v1 != mt1 { + t.Errorf("expected mt1, got %v", v1) + } + } + if ok2 { + present++ + if v2 != mt2 { + t.Errorf("expected mt2, got %v", v2) + } + } + if ok3 { + present++ + if v3 != mt3 { + t.Errorf("expected mt3, got %v", v3) + } + } + if present != 2 { + t.Errorf("expected exactly 2 items in cache, got %d", present) + } +} + +func TestDocMatchTreeCache_Concurrent(t *testing.T) { + cache := newDocMatchTreeCache(100) + + // Create some test data + trees := make([]*docMatchTree, 50) + for i := range trees { + trees[i] = &docMatchTree{} + } + + // Start multiple goroutines doing concurrent reads and writes + const numGoroutines = 10 + const numOperations = 1000 + + done := make(chan bool, numGoroutines) + + // Reader goroutines (should be majority of operations) + for i := 0; i < numGoroutines-1; i++ { + go func(id int) { + for j := 0; j < numOperations; j++ { + field := "field" + strconv.Itoa(j%10) + value := "value" + strconv.Itoa(j%20) + cache.Get(field, value) + } + done <- true + }(i) + } + + // Writer goroutine (fewer write operations) + go func() { + for j := 0; j < numOperations/10; j++ { + field := "field" + strconv.Itoa(j%10) + value := "value" + strconv.Itoa(j%20) + tree := trees[j%len(trees)] + cache.Add(field, value, tree) + } + done <- true + }() + + // Wait for all goroutines to complete + for i := 0; i < numGoroutines; i++ { + <-done + } +} diff --git a/index/document.go b/index/document.go new file mode 100644 index 000000000..25efa461e --- /dev/null +++ b/index/document.go @@ -0,0 +1,53 @@ +package index + +import "github.com/sourcegraph/zoekt" + +// Document holds a document (file) to index. +type Document struct { + Name string + Content []byte + Branches []string + SubRepositoryPath string + Language string + Category FileCategory + + SkipReason SkipReason + + // Document sections for symbols. Offsets should use bytes. + Symbols []DocumentSection + SymbolsMetaData []*zoekt.Symbol +} + +type SkipReason int + +const ( + SkipReasonNone SkipReason = iota + SkipReasonTooLarge + SkipReasonTooSmall + SkipReasonBinary + SkipReasonTooManyTrigrams + SkipReasonMissing +) + +func (s SkipReason) explanation() string { + switch s { + case SkipReasonNone: + return "" + case SkipReasonTooLarge: + return "exceeds the maximum size limit" + case SkipReasonTooSmall: + return "contains too few trigrams" + case SkipReasonBinary: + return "contains binary content" + case SkipReasonTooManyTrigrams: + return "contains too many trigrams" + case SkipReasonMissing: + return "object missing from repository" + default: + return "unknown skip reason" + } +} + +type DocumentSection struct { + Start, End uint32 +} diff --git a/eval.go b/index/eval.go similarity index 81% rename from eval.go rename to index/eval.go index c54070f25..16b9adfda 100644 --- a/eval.go +++ b/index/eval.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "context" @@ -26,6 +26,7 @@ import ( enry_data "github.com/go-enry/go-enry/v2/data" "github.com/grafana/regexp" + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/internal/tenant" "github.com/sourcegraph/zoekt/query" ) @@ -33,7 +34,7 @@ import ( // simplifyMultiRepo takes a query and a predicate. It returns Const(true) if all // repository names fulfill the predicate, Const(false) if none of them do, and q // otherwise. -func (d *indexData) simplifyMultiRepo(q query.Q, predicate func(*Repository) bool) query.Q { +func (d *indexData) simplifyMultiRepo(q query.Q, predicate func(*zoekt.Repository) bool) query.Q { count := 0 alive := len(d.repoMetaData) for i := range d.repoMetaData { @@ -56,11 +57,11 @@ func (d *indexData) simplify(in query.Q) query.Q { eval := query.Map(in, func(q query.Q) query.Q { switch r := q.(type) { case *query.Repo: - return d.simplifyMultiRepo(q, func(repo *Repository) bool { + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { return r.Regexp.MatchString(repo.Name) }) case *query.RepoRegexp: - return d.simplifyMultiRepo(q, func(repo *Repository) bool { + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { return r.Regexp.MatchString(repo.Name) }) case *query.BranchesRepos: @@ -73,13 +74,13 @@ func (d *indexData) simplify(in query.Q) query.Q { } return &query.Const{Value: false} case *query.RepoSet: - return d.simplifyMultiRepo(q, func(repo *Repository) bool { + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { return r.Set[repo.Name] }) case query.RawConfig: - return d.simplifyMultiRepo(q, func(repo *Repository) bool { return uint8(r)&encodeRawConfig(repo.RawConfig) == uint8(r) }) + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { return uint8(r)&encodeRawConfig(repo.RawConfig) == uint8(r) }) case *query.RepoIDs: - return d.simplifyMultiRepo(q, func(repo *Repository) bool { + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { return r.Repos.Contains(repo.ID) }) case *query.Language: @@ -117,31 +118,31 @@ func (d *indexData) simplify(in query.Q) query.Q { if !has { return &query.Const{Value: false} } + case *query.Meta: + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { + if repo.Metadata == nil { + return false + } + v, ok := repo.Metadata[r.Field] + if !ok { + return false + } + return r.Value.MatchString(v) + }) } return q }) return query.Simplify(eval) } -func (o *SearchOptions) SetDefaults() { - if o.ShardMaxMatchCount == 0 { - // We cap the total number of matches, so overly broad - // searches don't crash the machine. - o.ShardMaxMatchCount = 100000 - } - if o.TotalMaxMatchCount == 0 { - o.TotalMaxMatchCount = 10 * o.ShardMaxMatchCount - } -} - -func (d *indexData) Search(ctx context.Context, q query.Q, opts *SearchOptions) (sr *SearchResult, err error) { +func (d *indexData) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) { timer := newTimer() copyOpts := *opts opts = ©Opts opts.SetDefaults() - var res SearchResult + var res zoekt.SearchResult if len(d.fileNameIndex) == 0 { return &res, nil } @@ -200,12 +201,6 @@ func (d *indexData) Search(ctx context.Context, q query.Q, opts *SearchOptions) docCount := uint32(len(d.fileBranchMasks)) lastDoc := int(-1) - // document frequency per term - df := make(termDocumentFrequency) - - // term frequency per file match - var tfs []termFrequency - nextFileMatch: for { canceled := false @@ -293,10 +288,10 @@ nextFileMatch: } } - fileMatch := FileMatch{ + fileMatch := zoekt.FileMatch{ Repository: md.Name, RepositoryID: md.ID, - RepositoryPriority: md.priority, + RepositoryPriority: md.GetPriority(), FileName: string(d.fileName(nextDoc)), Checksum: d.getChecksum(nextDoc), Language: d.languageMap[d.getLanguage(nextDoc)], @@ -323,23 +318,16 @@ nextFileMatch: // Important invariant for performance: finalCands is sorted by offset and // non-overlapping. gatherMatches respects this invariant and all later // transformations respect this. - shouldMergeMatches := !opts.ChunkMatches - finalCands := d.gatherMatches(nextDoc, mt, known, shouldMergeMatches) + finalCands := d.gatherMatches(nextDoc, mt, known) if opts.ChunkMatches { - fileMatch.ChunkMatches = cp.fillChunkMatches(finalCands, opts.NumContextLines, fileMatch.Language, opts.DebugScore) + fileMatch.ChunkMatches = cp.fillChunkMatches(finalCands, opts.NumContextLines, fileMatch.Language, opts) } else { - fileMatch.LineMatches = cp.fillMatches(finalCands, opts.NumContextLines, fileMatch.Language, opts.DebugScore) + fileMatch.LineMatches = cp.fillMatches(finalCands, opts.NumContextLines, fileMatch.Language, opts) } - var tf map[string]int if opts.UseBM25Scoring { - // For BM25 scoring, the calculation of the score is split in two parts. Here we - // calculate the term frequencies for the current document and update the - // document frequencies. Since we don't store document frequencies in the index, - // we have to defer the calculation of the final BM25 score to after the whole - // shard has been processed. - tf = cp.calculateTermFrequency(finalCands, df) + d.scoreFileBM25(&fileMatch, nextDoc, finalCands, cp, opts) } else { // Use the standard, non-experimental scoring method by default d.scoreFile(&fileMatch, nextDoc, mt, known, opts) @@ -360,13 +348,6 @@ nextFileMatch: repoMatchCount += len(fileMatch.LineMatches) repoMatchCount += matchedChunkRanges - if opts.UseBM25Scoring { - // Invariant: tfs[i] belongs to res.Files[i] - tfs = append(tfs, termFrequency{ - doc: nextDoc, - tf: tf, - }) - } res.Files = append(res.Files, fileMatch) res.Stats.MatchCount += len(fileMatch.LineMatches) @@ -374,14 +355,6 @@ nextFileMatch: res.Stats.FileCount++ } - // Calculate BM25 score for all file matches in the shard. We assume that we - // have seen all documents containing any of the terms in the query so that df - // correctly reflects the document frequencies. This is true, for example, if - // all terms in the query are ORed together. - if opts.UseBM25Scoring { - d.scoreFilesUsingBM25(res.Files, tfs, df, opts) - } - for _, md := range d.repoMetaData { r := md addRepo(&res, &r) @@ -398,7 +371,7 @@ nextFileMatch: return &res, nil } -func addRepo(res *SearchResult, repo *Repository) { +func addRepo(res *zoekt.SearchResult, repo *zoekt.Repository) { if res.RepoURLs == nil { res.RepoURLs = map[string]string{} } @@ -415,9 +388,9 @@ func addRepo(res *SearchResult, repo *Repository) { // returned, with filename matches first. // // If `merge` is set, overlapping and adjacent matches will be merged -// into a single match. Otherwise, overlapping matches will be removed, +// into a single index. Otherwise, overlapping matches will be removed, // but adjacent matches will remain. -func (d *indexData) gatherMatches(nextDoc uint32, mt matchTree, known map[matchTree]bool, merge bool) []*candidateMatch { +func (d *indexData) gatherMatches(nextDoc uint32, mt matchTree, known map[matchTree]bool) []*candidateMatch { var cands []*candidateMatch visitMatches(mt, known, 1, func(mt matchTree, scoreWeight float64) { if smt, ok := mt.(*substrMatchTree); ok { @@ -449,9 +422,10 @@ func (d *indexData) gatherMatches(nextDoc uint32, mt matchTree, known map[matchT }} } + // Remove overlapping candidates. This guarantees that the matches + // are non-overlapping, but also preserves expected match counts. sort.Sort((sortByOffsetSlice)(cands)) res := cands[:0] - mergeRun := 1 for i, c := range cands { if i == 0 { res = append(res, c) @@ -466,39 +440,12 @@ func (d *indexData) gatherMatches(nextDoc uint32, mt matchTree, known map[matchT continue } - if merge { - // Merge adjacent candidates. This guarantees that the matches - // are non-overlapping. - lastEnd := last.byteOffset + last.byteMatchSz - end := c.byteOffset + c.byteMatchSz - if lastEnd >= c.byteOffset { - mergeRun++ - // Average out the score across the merged candidates. Only do it if - // we are boosting to avoid floating point funkiness in the normal - // case. - if !(epsilonEqualsOne(last.scoreWeight) && epsilonEqualsOne(c.scoreWeight)) { - last.scoreWeight = ((last.scoreWeight * float64(mergeRun-1)) + c.scoreWeight) / float64(mergeRun) - } - - // latest candidate goes further, update our end - if end > lastEnd { - last.byteMatchSz = end - last.byteOffset - } - - continue - } else { - mergeRun = 1 - } - } else { - // Remove overlapping candidates. This guarantees that the matches - // are non-overlapping, but also preserves expected match counts. - lastEnd := last.byteOffset + last.byteMatchSz - if lastEnd > c.byteOffset { - continue - } + // Only add the match if its range doesn't overlap + lastEnd := last.byteOffset + last.byteMatchSz + if lastEnd <= c.byteOffset { + res = append(res, c) + continue } - - res = append(res, c) } return res } @@ -562,7 +509,7 @@ func (d *indexData) gatherBranches(docID uint32, mt matchTree, known map[matchTr } var branches []string - id := uint32(1) + id := uint64(1) branchNames := d.branchNames[d.repos[docID]] for mask != 0 { if mask&0x1 != 0 { @@ -575,19 +522,19 @@ func (d *indexData) gatherBranches(docID uint32, mt matchTree, known map[matchTr return branches } -func (d *indexData) List(ctx context.Context, q query.Q, opts *ListOptions) (rl *RepoList, err error) { - var include func(rle *RepoListEntry) bool +func (d *indexData) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (rl *zoekt.RepoList, err error) { + var include func(rle *zoekt.RepoListEntry) bool q = d.simplify(q) if c, ok := q.(*query.Const); ok { if !c.Value { - return &RepoList{}, nil + return &zoekt.RepoList{}, nil } - include = func(rle *RepoListEntry) bool { + include = func(rle *zoekt.RepoListEntry) bool { return true } } else { - sr, err := d.Search(ctx, q, &SearchOptions{ + sr, err := d.Search(ctx, q, &zoekt.SearchOptions{ ShardRepoMaxMatchCount: 1, }) if err != nil { @@ -599,23 +546,23 @@ func (d *indexData) List(ctx context.Context, q query.Q, opts *ListOptions) (rl foundRepos[file.Repository] = struct{}{} } - include = func(rle *RepoListEntry) bool { + include = func(rle *zoekt.RepoListEntry) bool { _, ok := foundRepos[rle.Repository.Name] return ok } } - var l RepoList + var l zoekt.RepoList field, err := opts.GetField() if err != nil { return nil, err } switch field { - case RepoListFieldRepos: - l.Repos = make([]*RepoListEntry, 0, len(d.repoListEntry)) - case RepoListFieldReposMap: - l.ReposMap = make(ReposMap, len(d.repoListEntry)) + case zoekt.RepoListFieldRepos: + l.Repos = make([]*zoekt.RepoListEntry, 0, len(d.repoListEntry)) + case zoekt.RepoListFieldReposMap: + l.ReposMap = make(zoekt.ReposMap, len(d.repoListEntry)) } for i := range d.repoListEntry { @@ -641,10 +588,10 @@ func (d *indexData) List(ctx context.Context, q query.Q, opts *ListOptions) (rl } switch field { - case RepoListFieldRepos: + case zoekt.RepoListFieldRepos: l.Repos = append(l.Repos, rle) - case RepoListFieldReposMap: - l.ReposMap[rle.Repository.ID] = MinimalRepoListEntry{ + case zoekt.RepoListFieldReposMap: + l.ReposMap[rle.Repository.ID] = zoekt.MinimalRepoListEntry{ HasSymbols: rle.Repository.HasSymbols, Branches: rle.Repository.Branches, IndexTimeUnix: rle.IndexMetadata.IndexTime.Unix(), diff --git a/eval_test.go b/index/eval_test.go similarity index 80% rename from eval_test.go rename to index/eval_test.go index 36e78ed9d..17ec39829 100644 --- a/eval_test.go +++ b/index/eval_test.go @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "context" "hash/fnv" "reflect" "regexp/syntax" + "strconv" "strings" "testing" @@ -27,6 +28,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/grafana/regexp" + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/query" ) @@ -154,7 +156,7 @@ func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) { ctx := context.Background() q := &query.Const{Value: true} - opts := &SearchOptions{ShardRepoMaxMatchCount: 1} + opts := &zoekt.SearchOptions{ShardRepoMaxMatchCount: 1} sr, err := cs.Search(ctx, q, opts) if err != nil { @@ -174,7 +176,7 @@ func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) { }) t.Run("stats", func(t *testing.T) { - got, want := sr.Stats, Stats{ + got, want := sr.Stats, zoekt.Stats{ ContentBytesLoaded: 0, FileCount: 2, FilesConsidered: 2, @@ -182,7 +184,7 @@ func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) { ShardsScanned: 1, MatchCount: 2, } - if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { + if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { t.Errorf("mismatch (-want, +got): %s", diff) } }) @@ -191,10 +193,10 @@ func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) { func compoundReposShard(t *testing.T, names ...string) *indexData { t.Helper() - repos := make([]*Repository, 0, len(names)) + repos := make([]*zoekt.Repository, 0, len(names)) docs := make([][]Document, 0, len(names)) for _, name := range names { - repos = append(repos, &Repository{ID: hash(name), Name: name}) + repos = append(repos, &zoekt.Repository{ID: hash(name), Name: name}) ds := []Document{ {Name: name + ".txt", Content: []byte(name + " content")}, {Name: name + ".2.txt", Content: []byte(name + " content 2")}, @@ -202,7 +204,7 @@ func compoundReposShard(t *testing.T, names ...string) *indexData { docs = append(docs, ds) } - b := testIndexBuilderCompound(t, repos, docs) + b := testShardBuilderCompound(t, repos, docs) s := searcherForTest(t, b) return s.(*indexData) } @@ -375,8 +377,8 @@ func hash(name string) uint32 { func TestGatherBranches(t *testing.T) { content := []byte("dummy") - b := testIndexBuilder(t, &Repository{ - Branches: []RepositoryBranch{ + b := testShardBuilder(t, &zoekt.Repository{ + Branches: []zoekt.RepositoryBranch{ {"foo", "v1"}, {"foo-2", "v1"}, {"main", "v1"}, @@ -396,7 +398,7 @@ func TestGatherBranches(t *testing.T) { &query.Branch{Pattern: "foo"}, &query.Branch{Pattern: "quz"}, }}, - &SearchOptions{}, + &zoekt.SearchOptions{}, ) if err != nil { t.Fatal(err) @@ -417,3 +419,72 @@ func TestGatherBranches(t *testing.T) { } } } + +func TestGatherBranchesMany(t *testing.T) { + content := []byte("dummy") + manyBranchNames := []string{} + manyBranches := []zoekt.RepositoryBranch{} + for i := range 64 { + branchName := "branch-" + strconv.Itoa(i) + manyBranchNames = append(manyBranchNames, branchName) + manyBranches = append(manyBranches, zoekt.RepositoryBranch{ + Name: branchName, + Version: "v1"}) + } + b := testShardBuilder(t, &zoekt.Repository{ + Branches: manyBranches, + }, Document{Name: "f1", Content: content, Branches: manyBranchNames}) + + d := searcherForTest(t, b).(*indexData) + + sr, err := d.Search( + context.Background(), + &query.Substring{ + Pattern: "dummy", + CaseSensitive: false, + }, + &zoekt.SearchOptions{}, + ) + if err != nil { + t.Fatal(err) + } + + want := map[string][]string{ + "f1": manyBranchNames, + } + + if len(sr.Files) != 1 { + t.Fatalf("len(sr.Files): want %d, got %d", 1, len(sr.Files)) + } + + for _, f := range sr.Files { + if d := cmp.Diff(want[f.FileName], f.Branches); d != "" { + t.Fatalf("-want,+got:\n%s", d) + } + } +} + +func TestSimplifyMeta(t *testing.T) { + re := regexp.MustCompile("^stable$") + d := compoundReposShard(t, "foo", "bar") + + // Inject metadata into the fake repos + d.repoMetaData[0].Metadata = map[string]string{"release": "stable"} + d.repoMetaData[1].Metadata = map[string]string{"release": "beta"} + + all := &query.Meta{Field: "release", Value: regexp.MustCompile(".*")} + some := &query.Meta{Field: "release", Value: re} + none := &query.Meta{Field: "release", Value: regexp.MustCompile("^nonexistent$")} + + if got := d.simplify(all); !reflect.DeepEqual(got, &query.Const{Value: true}) { + t.Errorf("simplify(all): got %v, want Const(true)", got) + } + + if got := d.simplify(some); got != some { + t.Errorf("simplify(some): got %v, want unchanged", got) + } + + if got := d.simplify(none); !reflect.DeepEqual(got, &query.Const{Value: false}) { + t.Errorf("simplify(none): got %v, want Const(false)", got) + } +} diff --git a/index/file_category.go b/index/file_category.go new file mode 100644 index 000000000..fa365ccd0 --- /dev/null +++ b/index/file_category.go @@ -0,0 +1,115 @@ +package index + +import ( + "errors" + + "github.com/go-enry/go-enry/v2" +) + +// FileCategory represents the category of a file, as determined by go-enry. It is non-exhaustive +// but tries to the major cases like whether the file is a test, generated, etc. +// +// A file's category is used in search scoring to determine the weight of a file match. +type FileCategory byte + +const ( + // FileCategoryMissing is a sentinel value that indicates we never computed the file category during indexing + // (which means we're reading from an old index version). This value can never be written to the index. + FileCategoryMissing FileCategory = iota + FileCategoryDefault + FileCategoryTest + FileCategoryVendored + FileCategoryGenerated + FileCategoryConfig + FileCategoryDotFile + FileCategoryBinary + FileCategoryDocumentation +) + +func DetermineFileCategory(doc *Document) { + if doc.SkipReason == SkipReasonBinary { + doc.Category = FileCategoryBinary + return + } + + name := doc.Name + content := doc.Content + + // If this document was skipped (too large, binary, or missing from the repo), + // guess the category based on the filename to avoid examining the contents. + // Note: passing nil content is allowed by the go-enry contract. + if doc.SkipReason == SkipReasonTooLarge || doc.SkipReason == SkipReasonBinary || doc.SkipReason == SkipReasonMissing { + content = nil + } + + category := FileCategoryDefault + if enry.IsTest(name) { + category = FileCategoryTest + } else if enry.IsDotFile(name) { + category = FileCategoryDotFile + } else if enry.IsVendor(name) { + category = FileCategoryVendored + } else if enry.IsGenerated(name, content) { + category = FileCategoryGenerated + } else if enry.IsConfiguration(name) { + category = FileCategoryConfig + } else if enry.IsDocumentation(name) { + category = FileCategoryDocumentation + } + + doc.Category = category +} + +// lowPriority returns true if this file category is considered 'low priority'. This is used +// in search scoring to down-weight matches in these files. +func (c FileCategory) lowPriority() bool { + return c == FileCategoryTest || c == FileCategoryVendored || c == FileCategoryGenerated || c == FileCategoryBinary +} + +func (c FileCategory) encode() (byte, error) { + switch c { + case FileCategoryMissing: + return 0, errors.New("cannot encode missing file category") + case FileCategoryDefault: + return 1, nil + case FileCategoryTest: + return 2, nil + case FileCategoryVendored: + return 3, nil + case FileCategoryGenerated: + return 4, nil + case FileCategoryConfig: + return 5, nil + case FileCategoryDotFile: + return 6, nil + case FileCategoryDocumentation: + return 7, nil + case FileCategoryBinary: + return 8, nil + default: + return 0, errors.New("unrecognized file category") + } +} + +func decodeCategory(c byte) (FileCategory, error) { + switch c { + case 1: + return FileCategoryDefault, nil + case 2: + return FileCategoryTest, nil + case 3: + return FileCategoryVendored, nil + case 4: + return FileCategoryGenerated, nil + case 5: + return FileCategoryConfig, nil + case 6: + return FileCategoryDotFile, nil + case 7: + return FileCategoryDocumentation, nil + case 8: + return FileCategoryBinary, nil + default: + return FileCategoryMissing, errors.New("unrecognized file category") + } +} diff --git a/index/file_category_test.go b/index/file_category_test.go new file mode 100644 index 000000000..72c7e8e04 --- /dev/null +++ b/index/file_category_test.go @@ -0,0 +1,72 @@ +package index + +import ( + "testing" +) + +func TestDetermineFileCategory(t *testing.T) { + tests := []struct { + name string + filename string + content []byte + want FileCategory + }{ + { + name: "test file", + filename: "foo_test.go", + content: []byte("package foo"), + want: FileCategoryTest, + }, + { + name: "vendor file", + filename: "vendor/foo.go", + content: []byte("package foo"), + want: FileCategoryVendored, + }, + { + name: "generated file", + filename: "foo.go", + content: []byte("// Code generated by protoc-gen-go. DO NOT EDIT.\n" + + "... some generated code ..."), + want: FileCategoryGenerated, + }, + { + name: "config file", + filename: "package.json", + content: []byte("{}"), + want: FileCategoryConfig, + }, + { + name: "dot file", + filename: ".gitignore", + content: []byte("*.o"), + want: FileCategoryDotFile, + }, + { + name: "documentation file", + filename: "README.md", + content: []byte("# Documentation"), + want: FileCategoryDocumentation, + }, + { + name: "default file", + filename: "main.go", + content: []byte("package main"), + want: FileCategoryDefault, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + doc := &Document{ + Name: tt.filename, + Content: tt.content, + } + + DetermineFileCategory(doc) + if doc.Category != tt.want { + t.Errorf("DetermineFileCategory() = %v, want %v", doc.Name, tt.want) + } + }) + } +} diff --git a/hititer.go b/index/hititer.go similarity index 88% rename from hititer.go rename to index/hititer.go index 01a58d1e1..1d63fc797 100644 --- a/hititer.go +++ b/index/hititer.go @@ -12,25 +12,28 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "encoding/binary" "fmt" + "math" + + "github.com/sourcegraph/zoekt" ) // hitIterator finds potential search matches, measured in offsets of // the concatenation of all documents. type hitIterator interface { - // Return the first hit, or maxUInt32 if none. + // Return the first hit, or math.MaxUint32 if none. first() uint32 - // Skip until past limit. The argument maxUInt32 should be + // Skip until past limit. The argument math.MaxUint32 should be // treated specially. next(limit uint32) // Return how many bytes were read. - updateStats(s *Stats) + updateStats(s *zoekt.Stats) } // distanceHitIterator looks for hits at a fixed distance apart. @@ -50,8 +53,8 @@ func (i *distanceHitIterator) findNext() { var p1, p2 uint32 p1 = i.i1.first() p2 = i.i2.first() - if p1 == maxUInt32 || p2 == maxUInt32 { - i.i1.next(maxUInt32) + if p1 == math.MaxUint32 || p2 == math.MaxUint32 { + i.i1.next(math.MaxUint32) break } @@ -73,7 +76,7 @@ func (i *distanceHitIterator) first() uint32 { return i.i1.first() } -func (i *distanceHitIterator) updateStats(s *Stats) { +func (i *distanceHitIterator) updateStats(s *zoekt.Stats) { i.i1.updateStats(s) i.i2.updateStats(s) } @@ -83,7 +86,7 @@ func (i *distanceHitIterator) next(limit uint32) { l2 := limit + i.distance if l2 < limit { // overflow. - l2 = maxUInt32 + l2 = math.MaxUint32 } i.i2.next(l2) i.findNext() @@ -156,14 +159,14 @@ func (i *inMemoryIterator) first() uint32 { if len(i.postings) > 0 { return i.postings[0] } - return maxUInt32 + return math.MaxUint32 } -func (i *inMemoryIterator) updateStats(s *Stats) { +func (i *inMemoryIterator) updateStats(_ *zoekt.Stats) { } func (i *inMemoryIterator) next(limit uint32) { - if limit == maxUInt32 { + if limit == math.MaxUint32 { i.postings = nil } @@ -201,9 +204,9 @@ func (i *compressedPostingIterator) first() uint32 { } func (i *compressedPostingIterator) next(limit uint32) { - if limit == maxUInt32 { + if limit == math.MaxUint32 { i.blob = nil - i._first = maxUInt32 + i._first = math.MaxUint32 return } @@ -215,11 +218,11 @@ func (i *compressedPostingIterator) next(limit uint32) { } if i._first <= limit && len(i.blob) == 0 { - i._first = maxUInt32 + i._first = math.MaxUint32 } } -func (i *compressedPostingIterator) updateStats(s *Stats) { +func (i *compressedPostingIterator) updateStats(s *zoekt.Stats) { s.IndexBytesLoaded += int64(i.indexBytesLoaded) s.NgramLookups += i.ngramLookups i.indexBytesLoaded = 0 @@ -237,7 +240,7 @@ func (i *mergingIterator) String() string { return fmt.Sprintf("merge:%v", i.iters) } -func (i *mergingIterator) updateStats(s *Stats) { +func (i *mergingIterator) updateStats(s *zoekt.Stats) { s.NgramLookups += i.ngramLookups i.ngramLookups = 0 for _, j := range i.iters { @@ -246,7 +249,7 @@ func (i *mergingIterator) updateStats(s *Stats) { } func (i *mergingIterator) first() uint32 { - r := uint32(maxUInt32) + r := uint32(math.MaxUint32) for _, j := range i.iters { f := j.first() if f < r { diff --git a/hititer_test.go b/index/hititer_test.go similarity index 93% rename from hititer_test.go rename to index/hititer_test.go index 0c276c129..ab40b3798 100644 --- a/hititer_test.go +++ b/index/hititer_test.go @@ -12,17 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "fmt" "math/rand" "reflect" - "sort" "testing" "testing/quick" + "slices" + "github.com/google/go-cmp/cmp" + + "github.com/sourcegraph/zoekt" ) func TestCompressedPostingIterator_limit(t *testing.T) { @@ -32,7 +35,7 @@ func TestCompressedPostingIterator_limit(t *testing.T) { } nums = sortedUnique(nums) - sort.Slice(limits, func(i, j int) bool { return limits[i] < limits[j] }) + slices.Sort(limits) want := doHitIterator(&inMemoryIterator{postings: nums}, limits) @@ -81,7 +84,7 @@ func benchmarkCompressedPostingIterator(b *testing.B, size, limitsSize int) { limits := genUints32(limitsSize) nums = sortedUnique(nums) - sort.Slice(limits, func(i, j int) bool { return limits[i] < limits[j] }) + slices.Sort(limits) ng := stringToNGram("abc") deltas := toDeltas(nums) @@ -94,7 +97,7 @@ func benchmarkCompressedPostingIterator(b *testing.B, size, limitsSize int) { it.next(limit) _ = it.first() } - var s Stats + var s zoekt.Stats it.updateStats(&s) b.SetBytes(s.IndexBytesLoaded) } diff --git a/index_test.go b/index/index_test.go similarity index 82% rename from index_test.go rename to index/index_test.go index bdb92f5a4..642d7eff6 100644 --- a/index_test.go +++ b/index/index_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bytes" @@ -27,10 +27,11 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/grafana/regexp" + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/query" ) -func clearScores(r *SearchResult) { +func clearScores(r *zoekt.SearchResult) { for i := range r.Files { r.Files[i].Score = 0.0 for j := range r.Files[i].LineMatches { @@ -45,31 +46,31 @@ func clearScores(r *SearchResult) { } } -func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder { - t.Helper() +func testShardBuilder(tb testing.TB, repo *zoekt.Repository, docs ...Document) *ShardBuilder { + tb.Helper() - b, err := NewIndexBuilder(repo) + b, err := NewShardBuilder(repo) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + tb.Fatalf("NewShardBuilder: %v", err) } for i, d := range docs { if err := b.Add(d); err != nil { - t.Fatalf("Add %d: %v", i, err) + tb.Fatalf("Add %d: %v", i, err) } } return b } -func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder { +func testShardBuilderCompound(t *testing.T, repos []*zoekt.Repository, docs [][]Document) *ShardBuilder { t.Helper() - b := newIndexBuilder() + b := newShardBuilder(0) b.indexFormatVersion = NextIndexFormatVersion if len(repos) != len(docs) { - t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) + t.Fatalf("testShardBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) } for i, repo := range repos { @@ -87,7 +88,7 @@ func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Docume } func TestBoundary(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "f1", Content: []byte("x the")}, Document{Name: "f1", Content: []byte("reader")}) res := searchForTest(t, b, &query.Substring{Pattern: "there"}) @@ -97,9 +98,9 @@ func TestBoundary(t *testing.T) { } func TestDocSectionInvalid(t *testing.T) { - b, err := NewIndexBuilder(nil) + b, err := NewShardBuilder(nil) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } doc := Document{ Name: "f1", @@ -123,7 +124,7 @@ func TestDocSectionInvalid(t *testing.T) { } func TestBasic(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f2", Content: []byte("to carry water in the no later bla"), @@ -166,10 +167,10 @@ func TestBasic(t *testing.T) { } func TestEmptyIndex(t *testing.T) { - b := testIndexBuilder(t, nil) + b := testShardBuilder(t, nil) searcher := searcherForTest(t, b) - var opts SearchOptions + var opts zoekt.SearchOptions if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { t.Fatalf("Search: %v", err) } @@ -201,7 +202,7 @@ func (s *memSeeker) Size() (uint32, error) { } func TestNewlines(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, // -----------------------------------------012345-678901-234 Document{Name: "filename", Content: []byte("line1\nline2\nbla")}) @@ -209,10 +210,10 @@ func TestNewlines(t *testing.T) { sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) matches := sres.Files - want := []FileMatch{{ + want := []zoekt.FileMatch{{ FileName: "filename", - LineMatches: []LineMatch{{ - LineFragments: []LineFragmentMatch{{ + LineMatches: []zoekt.LineMatch{{ + LineFragments: []zoekt.LineFragmentMatch{{ Offset: 8, LineOffset: 2, MatchLength: 3, @@ -233,18 +234,18 @@ func TestNewlines(t *testing.T) { sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) matches := sres.Files - want := []FileMatch{{ + want := []zoekt.FileMatch{{ FileName: "filename", - ChunkMatches: []ChunkMatch{{ + ChunkMatches: []zoekt.ChunkMatch{{ Content: []byte("line2\n"), - ContentStart: Location{ + ContentStart: zoekt.Location{ ByteOffset: 6, LineNumber: 2, Column: 1, }, - Ranges: []Range{{ - Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3}, - End: Location{ByteOffset: 11, LineNumber: 2, Column: 6}, + Ranges: []zoekt.Range{{ + Start: zoekt.Location{ByteOffset: 8, LineNumber: 2, Column: 3}, + End: zoekt.Location{ByteOffset: 11, LineNumber: 2, Column: 6}, }}, }}, }} @@ -259,7 +260,7 @@ func TestNewlines(t *testing.T) { // single lines. func TestQueryNewlines(t *testing.T) { text := "line1\nline2\nbla" - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "filename", Content: []byte(text)}) t.Run("LineMatches", func(t *testing.T) { @@ -287,11 +288,11 @@ func TestQueryNewlines(t *testing.T) { }) } -var chunkOpts = SearchOptions{ChunkMatches: true} +var chunkOpts = zoekt.SearchOptions{ChunkMatches: true} -func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult { +func searchForTest(t *testing.T, b *ShardBuilder, q query.Q, o ...zoekt.SearchOptions) *zoekt.SearchResult { searcher := searcherForTest(t, b) - var opts SearchOptions + var opts zoekt.SearchOptions if len(o) > 0 { opts = o[0] } @@ -303,7 +304,7 @@ func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) return res } -func searcherForTest(t *testing.T, b *IndexBuilder) Searcher { +func searcherForTest(t testing.TB, b *ShardBuilder) zoekt.Searcher { var buf bytes.Buffer if err := b.Write(&buf); err != nil { t.Fatal(err) @@ -319,7 +320,7 @@ func searcherForTest(t *testing.T, b *IndexBuilder) Searcher { } func TestCaseFold(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "f1", Content: []byte("I love BaNaNAS.")}, // -----------------------------------012345678901234 ) @@ -375,19 +376,22 @@ func TestCaseFold(t *testing.T) { func wordsAsSymbols(doc Document) Document { re := regexp.MustCompile(`\b\w{2,}\b`) var symbols []DocumentSection + var symbolsMetadata []*zoekt.Symbol for _, match := range re.FindAllIndex(doc.Content, -1) { symbols = append(symbols, DocumentSection{ Start: uint32(match[0]), End: uint32(match[1]), }) + symbolsMetadata = append(symbolsMetadata, &zoekt.Symbol{Kind: "method"}) } doc.Symbols = symbols + doc.SymbolsMetaData = symbolsMetadata return doc } func TestSearchStats(t *testing.T) { ctx := context.Background() - searcher := searcherForTest(t, testIndexBuilder(t, nil, + searcher := searcherForTest(t, testShardBuilder(t, nil, wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}), wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}), wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}), @@ -404,7 +408,7 @@ func TestSearchStats(t *testing.T) { ) t.Run("LineMatches", func(t *testing.T) { - sres, err := searcher.Search(ctx, andQuery, &SearchOptions{}) + sres, err := searcher.Search(ctx, andQuery, &zoekt.SearchOptions{}) if err != nil { t.Fatal(err) } @@ -435,11 +439,11 @@ func TestSearchStats(t *testing.T) { cases := []struct { Name string Q query.Q - Want Stats + Want zoekt.Stats }{{ Name: "and-query", Q: andQuery, - Want: Stats{ + Want: zoekt.Stats{ FilesLoaded: 1, ContentBytesLoaded: 22, IndexBytesLoaded: 10, @@ -457,7 +461,7 @@ func TestSearchStats(t *testing.T) { Content: true, CaseSensitive: true, }, - Want: Stats{ + Want: zoekt.Stats{ ContentBytesLoaded: 14, IndexBytesLoaded: 1, FileCount: 1, @@ -474,7 +478,7 @@ func TestSearchStats(t *testing.T) { Pattern: "a y", Content: true, }, - Want: Stats{ + Want: zoekt.Stats{ ContentBytesLoaded: 14, IndexBytesLoaded: 1, FileCount: 1, @@ -492,7 +496,7 @@ func TestSearchStats(t *testing.T) { Content: true, CaseSensitive: true, }, - Want: Stats{ + Want: zoekt.Stats{ ShardsSkippedFilter: 1, NgramLookups: 1, // only had to lookup once }, @@ -510,7 +514,7 @@ func TestSearchStats(t *testing.T) { CaseSensitive: true, }, ), - Want: Stats{ + Want: zoekt.Stats{ IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning. ShardsSkippedFilter: 1, NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2). @@ -522,7 +526,7 @@ func TestSearchStats(t *testing.T) { Content: true, CaseSensitive: true, }}, - Want: Stats{ + Want: zoekt.Stats{ IndexBytesLoaded: 3, FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index MatchCount: 0, // even though there is a match it doesn't align with a symbol @@ -537,7 +541,7 @@ func TestSearchStats(t *testing.T) { Content: true, CaseSensitive: true, }}, - Want: Stats{ + Want: zoekt.Stats{ ContentBytesLoaded: 35, IndexBytesLoaded: 4, FileCount: 2, @@ -555,7 +559,7 @@ func TestSearchStats(t *testing.T) { Content: true, CaseSensitive: true, }}, - Want: Stats{ + Want: zoekt.Stats{ ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents IndexBytesLoaded: 10, FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index @@ -572,7 +576,7 @@ func TestSearchStats(t *testing.T) { Content: true, CaseSensitive: true, }}, - Want: Stats{ + Want: zoekt.Stats{ ContentBytesLoaded: 35, IndexBytesLoaded: 2, FileCount: 2, @@ -591,7 +595,7 @@ func TestSearchStats(t *testing.T) { if err != nil { t.Fatal(err) } - if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { + if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { t.Errorf("unexpected Stats (-want +got):\n%s", diff) } }) @@ -600,7 +604,7 @@ func TestSearchStats(t *testing.T) { } func TestAndNegateSearch(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "f1", Content: []byte("x banana y")}, // -----------------------------------0123456789 Document{Name: "f4", Content: []byte("x banana apple y")}) @@ -655,7 +659,7 @@ func TestAndNegateSearch(t *testing.T) { } func TestNegativeMatchesOnlyShortcut(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "f1", Content: []byte("x banana y")}, Document{Name: "f2", Content: []byte("x appelmoes y")}, Document{Name: "f3", Content: []byte("x appelmoes y")}, @@ -691,7 +695,7 @@ func TestNegativeMatchesOnlyShortcut(t *testing.T) { } func TestFileSearch(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "banzana", Content: []byte("x orange y")}, // -------------0123456 Document{Name: "banana", Content: []byte("x apple y")}, @@ -710,9 +714,9 @@ func TestFileSearch(t *testing.T) { } got := matches[0].LineMatches[0] - want := LineMatch{ + want := zoekt.LineMatch{ Line: []byte("banana"), - LineFragments: []LineFragmentMatch{{ + LineFragments: []zoekt.LineFragmentMatch{{ Offset: 1, LineOffset: 1, MatchLength: 4, @@ -737,12 +741,12 @@ func TestFileSearch(t *testing.T) { } got := matches[0].ChunkMatches[0] - want := ChunkMatch{ + want := zoekt.ChunkMatch{ Content: []byte("banana"), - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, - Ranges: []Range{{ - Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2}, - End: Location{ByteOffset: 5, LineNumber: 1, Column: 6}, + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, + Ranges: []zoekt.Range{{ + Start: zoekt.Location{ByteOffset: 1, LineNumber: 1, Column: 2}, + End: zoekt.Location{ByteOffset: 5, LineNumber: 1, Column: 6}, }}, FileName: true, } @@ -761,12 +765,12 @@ func TestFileSearch(t *testing.T) { } got := matches[0].ChunkMatches[0] - want := ChunkMatch{ + want := zoekt.ChunkMatch{ Content: []byte("banana"), - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, - Ranges: []Range{{ - Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, - End: Location{ByteOffset: 6, LineNumber: 1, Column: 7}, + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, + Ranges: []zoekt.Range{{ + Start: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, + End: zoekt.Location{ByteOffset: 6, LineNumber: 1, Column: 7}, }}, FileName: true, } @@ -778,7 +782,7 @@ func TestFileSearch(t *testing.T) { } func TestFileCase(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "BANANA", Content: []byte("x orange y")}) t.Run("LineMatches", func(t *testing.T) { @@ -807,7 +811,7 @@ func TestFileCase(t *testing.T) { } func TestFileRegexpSearchBruteForce(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "banzana", Content: []byte("x orange y")}, Document{Name: "banana", Content: []byte("x apple y")}, ) @@ -836,7 +840,7 @@ func TestFileRegexpSearchBruteForce(t *testing.T) { } func TestFileRegexpSearchShortString(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "banana.py", Content: []byte("x orange y")}) t.Run("LineMatches", func(t *testing.T) { @@ -865,7 +869,7 @@ func TestFileRegexpSearchShortString(t *testing.T) { } func TestFileSubstringSearchBruteForce(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "BANZANA", Content: []byte("x orange y")}, Document{Name: "banana", Content: []byte("x apple y")}) @@ -890,7 +894,7 @@ func TestFileSubstringSearchBruteForce(t *testing.T) { } func TestFileSubstringSearchBruteForceEnd(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "BANZANA", Content: []byte("x orange y")}, Document{Name: "bananaq", Content: []byte("x apple y")}) @@ -914,7 +918,7 @@ func TestFileSubstringSearchBruteForceEnd(t *testing.T) { } func TestSearchMatchAll(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "banzana", Content: []byte("x orange y")}, Document{Name: "banana", Content: []byte("x apple y")}) @@ -936,7 +940,7 @@ func TestSearchMatchAll(t *testing.T) { } func TestSearchNewline(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "banzana", Content: []byte("abcd\ndefg")}) t.Run("LineMatches", func(t *testing.T) { @@ -963,7 +967,7 @@ func TestSearchNewline(t *testing.T) { } func TestSearchMatchAllRegexp(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "banzana", Content: []byte("abcd")}, Document{Name: "banana", Content: []byte("pqrs")}) @@ -992,8 +996,90 @@ func TestSearchMatchAllRegexp(t *testing.T) { }) } +func TestSearchBM25MatchScores(t *testing.T) { + ctx := context.Background() + searcher := searcherForTest(t, testShardBuilder(t, nil, + Document{Name: "f1", Content: []byte("one two three\naaaaaaaaaa\nbbbbbbbb\none two two")}, + Document{Name: "f2", Content: []byte("four five six\naaaaaaaaaa\nbbbbbbbb\nfour five five\nsix six")}, + wordsAsSymbols(Document{Name: "f3", Content: []byte("public static void main")}), + )) + + t.Run("LineMatches", func(t *testing.T) { + q := &query.Substring{Pattern: "two"} + sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true}) + if err != nil { + t.Fatal(err) + } + matches := sres.Files + if len(matches) != 1 { + t.Fatalf("want 1 file index, got %d", len(matches)) + } + + if len(matches[0].LineMatches) != 2 { + t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches)) + } + + if matches[0].LineMatches[0].LineNumber != 4 { + t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].LineMatches[0].LineNumber) + } + }) + + t.Run("ChunkMatches", func(t *testing.T) { + q := &query.Substring{Pattern: "five"} + sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) + if err != nil { + t.Fatal(err) + } + + matches := sres.Files + if len(matches) != 1 { + t.Fatalf("want 1 file index, got %d", len(matches)) + } + + if len(matches[0].ChunkMatches) != 2 { + t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches)) + } + + if matches[0].ChunkMatches[0].BestLineMatch != 4 { + t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].ChunkMatches[0].BestLineMatch) + } + }) + + t.Run("ChunkMatches with symbols", func(t *testing.T) { + q := &query.Or{ + Children: []query.Q{ + &query.Symbol{Expr: &query.Substring{Pattern: "main"}}, + &query.Substring{Pattern: "five"}, + }, + } + + sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) + if err != nil { + t.Fatal(err) + } + + matches := sres.Files + if len(matches) != 2 { + t.Fatalf("want 2 file index, got %d", len(matches)) + } + + foundSymbolInfo := false + for _, m := range matches { + for _, cm := range m.ChunkMatches { + if len(cm.SymbolInfo) > 0 { + foundSymbolInfo = true + } + } + } + + if !foundSymbolInfo { + t.Fatalf("want symbol info, got none") + } + }) +} + func TestFileRestriction(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "banana1", Content: []byte("x orange y")}, Document{Name: "banana2", Content: []byte("x apple y")}, Document{Name: "orange", Content: []byte("x apple z")}) @@ -1046,7 +1132,7 @@ func TestFileRestriction(t *testing.T) { } func TestFileNameBoundary(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "banana2", Content: []byte("x apple y")}, Document{Name: "helpers.go", Content: []byte("x apple y")}, Document{Name: "foo", Content: []byte("x apple y")}) @@ -1078,11 +1164,11 @@ func TestFileNameBoundary(t *testing.T) { func TestDocumentOrder(t *testing.T) { var docs []Document - for i := 0; i < 3; i++ { + for i := range 3 { docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")}) } - b := testIndexBuilder(t, nil, docs...) + b := testShardBuilder(t, nil, docs...) t.Run("LineMatches", func(t *testing.T) { sres := searchForTest(t, b, query.NewAnd( @@ -1120,8 +1206,8 @@ func TestDocumentOrder(t *testing.T) { } func TestBranchMask(t *testing.T) { - b := testIndexBuilder(t, &Repository{ - Branches: []RepositoryBranch{ + b := testShardBuilder(t, &zoekt.Repository{ + Branches: []zoekt.RepositoryBranch{ {"master", "v-master"}, {"stable", "v-stable"}, {"bonzai", "v-bonzai"}, @@ -1173,26 +1259,26 @@ func TestBranchMask(t *testing.T) { func TestBranchLimit(t *testing.T) { for limit := 64; limit <= 65; limit++ { - r := &Repository{} - for i := 0; i < limit; i++ { + r := &zoekt.Repository{} + for i := range limit { s := fmt.Sprintf("b%d", i) - r.Branches = append(r.Branches, RepositoryBranch{ + r.Branches = append(r.Branches, zoekt.RepositoryBranch{ s, "v-" + s, }) } - _, err := NewIndexBuilder(r) + _, err := NewShardBuilder(r) if limit == 64 && err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } else if limit == 65 && err == nil { - t.Fatalf("NewIndexBuilder succeeded") + t.Fatalf("NewShardBuilder succeeded") } } } func TestBranchReport(t *testing.T) { branches := []string{"stable", "master"} - b := testIndexBuilder(t, &Repository{ - Branches: []RepositoryBranch{ + b := testShardBuilder(t, &zoekt.Repository{ + Branches: []zoekt.RepositoryBranch{ {"stable", "vs"}, {"master", "vm"}, }, @@ -1229,8 +1315,8 @@ func TestBranchReport(t *testing.T) { } func TestBranchVersions(t *testing.T) { - b := testIndexBuilder(t, &Repository{ - Branches: []RepositoryBranch{ + b := testShardBuilder(t, &zoekt.Repository{ + Branches: []zoekt.RepositoryBranch{ {"stable", "v-stable"}, {"master", "v-master"}, }, @@ -1278,7 +1364,7 @@ func TestRegexp(t *testing.T) { content := []byte("needle the bla") // ----------------01234567890123 - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: content, @@ -1295,8 +1381,8 @@ func TestRegexp(t *testing.T) { } got := sres.Files[0].LineMatches[0] - want := LineMatch{ - LineFragments: []LineFragmentMatch{{ + want := zoekt.LineMatch{ + LineFragments: []zoekt.LineFragmentMatch{{ LineOffset: 3, Offset: 3, MatchLength: 11, @@ -1324,12 +1410,12 @@ func TestRegexp(t *testing.T) { } got := sres.Files[0].ChunkMatches[0] - want := ChunkMatch{ + want := zoekt.ChunkMatch{ Content: content, - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, - Ranges: []Range{{ - Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4}, - End: Location{ByteOffset: 14, LineNumber: 1, Column: 15}, + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, + Ranges: []zoekt.Range{{ + Start: zoekt.Location{ByteOffset: 3, LineNumber: 1, Column: 4}, + End: zoekt.Location{ByteOffset: 14, LineNumber: 1, Column: 15}, }}, } @@ -1343,7 +1429,7 @@ func TestRegexpFile(t *testing.T) { content := []byte("needle the bla") name := "let's play: find the mussel" - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: name, Content: content}, Document{Name: "play.txt", Content: content}) @@ -1384,7 +1470,7 @@ func TestRegexpOrder(t *testing.T) { content := []byte("bla the needle") // ----------------01234567890123 - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "f1", Content: content}) t.Run("LineMatches", func(t *testing.T) { @@ -1414,7 +1500,7 @@ func TestRepoName(t *testing.T) { content := []byte("bla the needle") // ----------------01234567890123 - b := testIndexBuilder(t, &Repository{Name: "bla"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "bla"}, Document{Name: "f1", Content: content}) t.Run("LineMatches", func(t *testing.T) { @@ -1471,19 +1557,44 @@ func TestRepoName(t *testing.T) { } func TestMergeMatches(t *testing.T) { - content := []byte("blablabla") - b := testIndexBuilder(t, nil, - Document{Name: "f1", Content: content}) - - t.Run("LineMatches", func(t *testing.T) { + t.Run("LineMatches, adjacent matches", func(t *testing.T) { + b := testShardBuilder(t, nil, + Document{Name: "f1", Content: []byte("blablabla")}) sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}) + if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { t.Fatalf("got %v, want 1 match", sres.Files) } + + if len(sres.Files[0].LineMatches[0].LineFragments) != 3 { + t.Fatalf("got %v, want 3 fragments", sres.Files[0].LineMatches[0].LineFragments) + } }) - t.Run("ChunkMatches", func(t *testing.T) { + t.Run("LineMatches, overlapping matches", func(t *testing.T) { + b := testShardBuilder(t, nil, + Document{Name: "f1", Content: []byte("hellogoodbye")}) + sres := searchForTest(t, b, + &query.And{Children: []query.Q{ + &query.Substring{Pattern: "hello"}, + &query.Substring{Pattern: "logood"}, + }}) + + if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { + t.Fatalf("got %v, want 1 match", sres.Files) + } + + lineFragments := sres.Files[0].LineMatches[0].LineFragments + if len(lineFragments) != 1 || lineFragments[0].MatchLength != len("hello") { + t.Fatalf("got %v, want single line fragment 'hello'", lineFragments) + } + }) + + t.Run("ChunkMatches, no overlap", func(t *testing.T) { + b := testShardBuilder(t, nil, + Document{Name: "f1", Content: []byte("blablabla")}) + sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}, chunkOpts, @@ -1491,12 +1602,35 @@ func TestMergeMatches(t *testing.T) { if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { t.Fatalf("got %v, want 1 match", sres.Files) } + + if len(sres.Files[0].ChunkMatches[0].Ranges) != 3 { + t.Fatalf("got %v, want 3 ranges", sres.Files[0].ChunkMatches[0].Ranges) + } + }) + + t.Run("ChunkMatches, overlapping matches", func(t *testing.T) { + b := testShardBuilder(t, nil, + Document{Name: "f1", Content: []byte("hellogoodbye")}) + sres := searchForTest(t, b, + &query.And{Children: []query.Q{ + &query.Substring{Pattern: "hello"}, + &query.Substring{Pattern: "logood"}, + }}, chunkOpts) + + if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { + t.Fatalf("got %v, want 1 chunk match", sres.Files) + } + + ranges := sres.Files[0].ChunkMatches[0].Ranges + if len(ranges) != 1 || ranges[0].Start.ByteOffset != 0 || ranges[0].End.ByteOffset != 5 { + t.Fatalf("got %v, want single chunk range 'hello'", ranges) + } }) } func TestRepoURL(t *testing.T) { content := []byte("blablabla") - b := testIndexBuilder(t, &Repository{ + b := testShardBuilder(t, &zoekt.Repository{ Name: "name", URL: "URL", CommitURLTemplate: "commit", @@ -1516,7 +1650,7 @@ func TestRepoURL(t *testing.T) { func TestRegexpCaseSensitive(t *testing.T) { content := []byte("bla\nfunc unmarshalGitiles\n") - b := testIndexBuilder(t, nil, Document{ + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: content, }) @@ -1529,7 +1663,7 @@ func TestRegexpCaseSensitive(t *testing.T) { }) if len(res.Files) != 1 { - t.Fatalf("got %v, want one match", res.Files) + t.Fatalf("got %v, want one index", res.Files) } }) @@ -1543,7 +1677,7 @@ func TestRegexpCaseSensitive(t *testing.T) { ) if len(res.Files) != 1 { - t.Fatalf("got %v, want one match", res.Files) + t.Fatalf("got %v, want one index", res.Files) } }) } @@ -1551,7 +1685,7 @@ func TestRegexpCaseSensitive(t *testing.T) { func TestRegexpCaseFolding(t *testing.T) { content := []byte("bla\nfunc unmarshalGitiles\n") - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "f1", Content: content}) res := searchForTest(t, b, &query.Regexp{ @@ -1560,13 +1694,13 @@ func TestRegexpCaseFolding(t *testing.T) { }) if len(res.Files) != 1 { - t.Fatalf("got %v, want one match", res.Files) + t.Fatalf("got %v, want one index", res.Files) } } func TestCaseRegexp(t *testing.T) { content := []byte("BLABLABLA") - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "f1", Content: content}) t.Run("LineMatches", func(t *testing.T) { @@ -1598,7 +1732,7 @@ func TestCaseRegexp(t *testing.T) { func TestNegativeRegexp(t *testing.T) { content := []byte("BLABLABLA needle bla") - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "f1", Content: content}) t.Run("LineMatches", func(t *testing.T) { @@ -1643,7 +1777,7 @@ func TestSymbolRank(t *testing.T) { content := []byte("func bla() blubxxxxx") // ----------------01234567890123456789 - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: content, @@ -1667,7 +1801,7 @@ func TestSymbolRank(t *testing.T) { t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) } if res.Files[0].FileName != "f2" { - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) } }) @@ -1682,7 +1816,7 @@ func TestSymbolRank(t *testing.T) { t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) } if res.Files[0].FileName != "f2" { - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) } }) } @@ -1694,7 +1828,7 @@ func TestSymbolRankRegexpUTF8(t *testing.T) { content := []byte(prefix + "func bla() blub") // ------012345678901234 - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: content, @@ -1717,7 +1851,7 @@ func TestSymbolRankRegexpUTF8(t *testing.T) { t.Fatalf("got %#v, want 3 files", res.Files) } if res.Files[0].FileName != "f2" { - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) } }) @@ -1731,7 +1865,7 @@ func TestSymbolRankRegexpUTF8(t *testing.T) { t.Fatalf("got %#v, want 3 files", res.Files) } if res.Files[0].FileName != "f2" { - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) } }) } @@ -1742,7 +1876,7 @@ func TestPartialSymbolRank(t *testing.T) { content := []byte("func bla() blub") // ----------------012345678901234 - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: content, @@ -1767,7 +1901,7 @@ func TestPartialSymbolRank(t *testing.T) { t.Fatalf("got %#v, want 3 files", res.Files) } if res.Files[0].FileName != "f2" { - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) } }) @@ -1781,7 +1915,7 @@ func TestPartialSymbolRank(t *testing.T) { t.Fatalf("got %#v, want 3 files", res.Files) } if res.Files[0].FileName != "f2" { - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) } }) } @@ -1789,7 +1923,7 @@ func TestPartialSymbolRank(t *testing.T) { func TestNegativeRepo(t *testing.T) { content := []byte("bla the needle") // ----------------01234567890123 - b := testIndexBuilder(t, &Repository{ + b := testShardBuilder(t, &zoekt.Repository{ Name: "bla", }, Document{Name: "f1", Content: content}) @@ -1823,11 +1957,11 @@ func TestListRepos(t *testing.T) { // ----------------012345678901234- t.Run("default and minimal fallback", func(t *testing.T) { - repo := &Repository{ + repo := &zoekt.Repository{ Name: "reponame", - Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, + Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, } - b := testIndexBuilder(t, repo, + b := testShardBuilder(t, repo, Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, Document{Name: "f2", Content: content, Branches: []string{"main"}}, Document{Name: "f2", Content: content, Branches: []string{"dev"}}, @@ -1835,11 +1969,11 @@ func TestListRepos(t *testing.T) { searcher := searcherForTest(t, b) - for _, opts := range []*ListOptions{ + for _, opts := range []*zoekt.ListOptions{ nil, {}, - {Field: RepoListFieldRepos}, - {Field: RepoListFieldReposMap}, + {Field: zoekt.RepoListFieldRepos}, + {Field: zoekt.RepoListFieldReposMap}, } { t.Run(fmt.Sprint(opts), func(t *testing.T) { q := &query.Repo{Regexp: regexp.MustCompile("epo")} @@ -1849,10 +1983,10 @@ func TestListRepos(t *testing.T) { t.Fatalf("List(%v): %v", q, err) } - want := &RepoList{ - Repos: []*RepoListEntry{{ + want := &zoekt.RepoList{ + Repos: []*zoekt.RepoListEntry{{ Repository: *repo, - Stats: RepoStats{ + Stats: zoekt.RepoStats{ Documents: 4, ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 Shards: 1, @@ -1862,7 +1996,7 @@ func TestListRepos(t *testing.T) { OtherBranchesNewLinesCount: 3, }, }}, - Stats: RepoStats{ + Stats: zoekt.RepoStats{ Repos: 1, Documents: 4, ContentBytes: 68, @@ -1875,10 +2009,10 @@ func TestListRepos(t *testing.T) { } ignored := []cmp.Option{ cmpopts.EquateEmpty(), - cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), - cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), - cmpopts.IgnoreFields(Repository{}, "SubRepoMap"), - cmpopts.IgnoreFields(Repository{}, "priority"), + cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), + cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), + cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"), + cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), } if diff := cmp.Diff(want, res, ignored...); diff != "" { t.Fatalf("mismatch (-want +got):\n%s", diff) @@ -1897,13 +2031,13 @@ func TestListRepos(t *testing.T) { }) t.Run("minimal", func(t *testing.T) { - repo := &Repository{ + repo := &zoekt.Repository{ ID: 1234, Name: "reponame", - Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, + Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, RawConfig: map[string]string{"repoid": "1234"}, } - b := testIndexBuilder(t, repo, + b := testShardBuilder(t, repo, Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, Document{Name: "f2", Content: content, Branches: []string{"main"}}, Document{Name: "f2", Content: content, Branches: []string{"dev"}}, @@ -1912,19 +2046,19 @@ func TestListRepos(t *testing.T) { searcher := searcherForTest(t, b) q := &query.Repo{Regexp: regexp.MustCompile("epo")} - res, err := searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap}) + res, err := searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}) if err != nil { t.Fatalf("List(%v): %v", q, err) } - want := &RepoList{ - ReposMap: ReposMap{ + want := &zoekt.RepoList{ + ReposMap: zoekt.ReposMap{ repo.ID: { HasSymbols: repo.HasSymbols, Branches: repo.Branches, }, }, - Stats: RepoStats{ + Stats: zoekt.RepoStats{ Repos: 1, Shards: 1, Documents: 4, @@ -1937,14 +2071,14 @@ func TestListRepos(t *testing.T) { } ignored := []cmp.Option{ - cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"), + cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"), } if diff := cmp.Diff(want, res, ignored...); diff != "" { t.Fatalf("mismatch (-want +got):\n%s", diff) } q = &query.Repo{Regexp: regexp.MustCompile("bla")} - res, err = searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap}) + res, err = searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}) if err != nil { t.Fatalf("List(%v): %v", q, err) } @@ -1957,7 +2091,7 @@ func TestListRepos(t *testing.T) { func TestListReposByContent(t *testing.T) { content := []byte("bla the needle") - b := testIndexBuilder(t, &Repository{ + b := testShardBuilder(t, &zoekt.Repository{ Name: "reponame", }, Document{Name: "f1", Content: content}, @@ -1988,7 +2122,7 @@ func TestListReposByContent(t *testing.T) { func TestMetadata(t *testing.T) { content := []byte("bla the needle") - b := testIndexBuilder(t, &Repository{ + b := testShardBuilder(t, &zoekt.Repository{ Name: "reponame", }, Document{Name: "f1", Content: content}, Document{Name: "f2", Content: content}) @@ -2009,8 +2143,41 @@ func TestMetadata(t *testing.T) { } } +func TestRepoWithMetadata(t *testing.T) { + sb := newShardBuilder(0) + sb.repoList = []zoekt.Repository{ + { + Name: "repo1", + Metadata: map[string]string{"language": "go", "custom_key": "value"}, + }, + } + + var buf bytes.Buffer + if err := sb.Write(&buf); err != nil { + t.Fatalf("failed to write shard: %v", err) + } + + // Simulate reading the shard back + f := &memSeeker{buf.Bytes()} + repoMetaData, _, err := ReadMetadata(f) + if err != nil { + t.Fatalf("failed to read metadata: %v", err) + } + + // Verify the metadata + if len(repoMetaData) != 1 { + t.Fatalf("expected 1 repository, got %d", len(repoMetaData)) + } + if got, want := repoMetaData[0].Metadata["language"], "go"; got != want { + t.Errorf("expected metadata 'language' to be %q, got %q", want, got) + } + if got, want := repoMetaData[0].Metadata["custom_key"], "value"; got != want { + t.Errorf("expected metadata 'custom_key' to be %q, got %q", want, got) + } +} + func TestOr(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "f1", Content: []byte("needle")}, Document{Name: "f2", Content: []byte("banana")}) t.Run("LineMatches", func(t *testing.T) { @@ -2037,7 +2204,7 @@ func TestOr(t *testing.T) { func TestFrequency(t *testing.T) { content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot") - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: content, @@ -2066,7 +2233,7 @@ func TestMatchNewline(t *testing.T) { content := []byte("pqr\nalex") - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: content, @@ -2092,7 +2259,7 @@ func TestMatchNewline(t *testing.T) { } func TestSubRepo(t *testing.T) { - subRepos := map[string]*Repository{ + subRepos := map[string]*zoekt.Repository{ "sub": { Name: "sub-name", LineFragmentTemplate: "sub-line", @@ -2101,7 +2268,7 @@ func TestSubRepo(t *testing.T) { content := []byte("pqr\nalex") - b := testIndexBuilder(t, &Repository{ + b := testShardBuilder(t, &zoekt.Repository{ SubRepoMap: subRepos, }, Document{ Name: "sub/f1", @@ -2125,7 +2292,7 @@ func TestSubRepo(t *testing.T) { } func TestSearchEither(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "f1", Content: []byte("bla needle bla")}, Document{Name: "needle-file-branch", Content: []byte("bla content")}) @@ -2137,7 +2304,7 @@ func TestSearchEither(t *testing.T) { sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) if len(sres.Files) != 1 { - t.Fatalf("got %v, wanted 1 match", sres.Files) + t.Fatalf("got %v, wanted 1 index", sres.Files) } if got, want := sres.Files[0].FileName, "f1"; got != want { @@ -2153,7 +2320,7 @@ func TestSearchEither(t *testing.T) { sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) if len(sres.Files) != 1 { - t.Fatalf("got %v, wanted 1 match", sres.Files) + t.Fatalf("got %v, wanted 1 index", sres.Files) } if got, want := sres.Files[0].FileName, "f1"; got != want { @@ -2166,19 +2333,19 @@ func TestUnicodeExactMatch(t *testing.T) { needle := "néédlÉ" content := []byte("blá blá " + needle + " blâ") - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "f1", Content: content}) t.Run("LineMatches", func(t *testing.T) { if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { - t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) + t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files) } }) t.Run("ChunkMatches", func(t *testing.T) { res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) if len(res.Files) != 1 { - t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) + t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files) } }) } @@ -2187,17 +2354,17 @@ func TestUnicodeCoverContent(t *testing.T) { needle := "néédlÉ" content := []byte("blá blá " + needle + " blâ") - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "f1", Content: content}) t.Run("LineMatches", func(t *testing.T) { if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { - t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) + t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files) } res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) if len(res.Files) != 1 { - t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) + t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files) } if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { @@ -2208,12 +2375,12 @@ func TestUnicodeCoverContent(t *testing.T) { t.Run("ChunkMatches", func(t *testing.T) { res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) if len(res.Files) != 0 { - t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) + t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files) } res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) if len(res.Files) != 1 { - t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) + t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files) } got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset @@ -2228,13 +2395,13 @@ func TestUnicodeNonCoverContent(t *testing.T) { needle := "nééáádlÉ" content := []byte("blá blá " + needle + " blâ") - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "f1", Content: content}) t.Run("LineMatches", func(t *testing.T) { res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) if len(res.Files) != 1 { - t.Fatalf("got %v, wanted 1 match", res.Files) + t.Fatalf("got %v, wanted 1 index", res.Files) } if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { @@ -2245,7 +2412,7 @@ func TestUnicodeNonCoverContent(t *testing.T) { t.Run("ChunkMatches", func(t *testing.T) { res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) if len(res.Files) != 1 { - t.Fatalf("got %v, wanted 1 match", res.Files) + t.Fatalf("got %v, wanted 1 index", res.Files) } got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset @@ -2268,22 +2435,22 @@ func TestUnicodeVariableLength(t *testing.T) { " ee" + string([]rune{upper}) + "ee") t.Run("LineMatches", func(t *testing.T) { - b := testIndexBuilder(t, nil, - Document{Name: "f1", Content: []byte(corpus)}) + b := testShardBuilder(t, nil, + Document{Name: "f1", Content: corpus}) res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) if len(res.Files) != 1 { - t.Fatalf("got %v, wanted 1 match", res.Files) + t.Fatalf("got %v, wanted 1 index", res.Files) } }) t.Run("ChunkMatches", func(t *testing.T) { - b := testIndexBuilder(t, nil, - Document{Name: "f1", Content: []byte(corpus)}) + b := testShardBuilder(t, nil, + Document{Name: "f1", Content: corpus}) res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) if len(res.Files) != 1 { - t.Fatalf("got %v, wanted 1 match", res.Files) + t.Fatalf("got %v, wanted 1 index", res.Files) } }) } @@ -2291,7 +2458,7 @@ func TestUnicodeVariableLength(t *testing.T) { func TestUnicodeFileStartOffsets(t *testing.T) { unicode := "世界" wat := "waaaaaat" - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: []byte(unicode), @@ -2304,7 +2471,7 @@ func TestUnicodeFileStartOffsets(t *testing.T) { q := &query.Substring{Pattern: wat, Content: true} res := searchForTest(t, b, q) if len(res.Files) != 1 { - t.Fatalf("got %v, wanted 1 match", res.Files) + t.Fatalf("got %v, wanted 1 index", res.Files) } } @@ -2314,7 +2481,7 @@ func TestLongFileUTF8(t *testing.T) { // 6 bytes. unicode := "世界" content := []byte(strings.Repeat(unicode, 100) + needle) - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: []byte(strings.Repeat("a", 50)), @@ -2343,7 +2510,7 @@ func TestLongFileUTF8(t *testing.T) { func TestEstimateDocCount(t *testing.T) { content := []byte("bla needle bla") - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: content}, Document{Name: "f2", Content: content}, ) @@ -2353,7 +2520,7 @@ func TestEstimateDocCount(t *testing.T) { query.NewAnd( &query.Substring{Pattern: "needle"}, &query.Repo{Regexp: regexp.MustCompile("reponame")}, - ), SearchOptions{ + ), zoekt.SearchOptions{ EstimateDocCount: true, }); sres.Stats.ShardFilesConsidered != 2 { t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) @@ -2362,7 +2529,7 @@ func TestEstimateDocCount(t *testing.T) { query.NewAnd( &query.Substring{Pattern: "needle"}, &query.Repo{Regexp: regexp.MustCompile("nomatch")}, - ), SearchOptions{ + ), zoekt.SearchOptions{ EstimateDocCount: true, }); sres.Stats.ShardFilesConsidered != 0 { t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) @@ -2374,7 +2541,7 @@ func TestEstimateDocCount(t *testing.T) { query.NewAnd( &query.Substring{Pattern: "needle"}, &query.Repo{Regexp: regexp.MustCompile("reponame")}, - ), SearchOptions{ + ), zoekt.SearchOptions{ EstimateDocCount: true, ChunkMatches: true, }); sres.Stats.ShardFilesConsidered != 2 { @@ -2384,7 +2551,7 @@ func TestEstimateDocCount(t *testing.T) { query.NewAnd( &query.Substring{Pattern: "needle"}, &query.Repo{Regexp: regexp.MustCompile("nomatch")}, - ), SearchOptions{ + ), zoekt.SearchOptions{ EstimateDocCount: true, ChunkMatches: true, }); sres.Stats.ShardFilesConsidered != 0 { @@ -2398,7 +2565,7 @@ func TestUTF8CorrectCorpus(t *testing.T) { // 6 bytes. unicode := "世界" - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: []byte(strings.Repeat(unicode, 100)), @@ -2426,7 +2593,7 @@ func TestUTF8CorrectCorpus(t *testing.T) { } func TestBuilderStats(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: []byte(strings.Repeat("abcd", 1024)), @@ -2442,7 +2609,7 @@ func TestBuilderStats(t *testing.T) { } func TestIOStats(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: []byte(strings.Repeat("abcd", 1024)), @@ -2453,7 +2620,7 @@ func TestIOStats(t *testing.T) { res := searchForTest(t, b, q) // 4096 (content) + 2 (overhead: newlines or doc sections) - if got, want := res.Stats.ContentBytesLoaded, int64(4100); got != want { + if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { t.Errorf("got content I/O %d, want %d", got, want) } @@ -2479,10 +2646,42 @@ func TestIOStats(t *testing.T) { t.Errorf("got index I/O %d, want %d", got, want) } }) + + t.Run("LineMatches with BM25", func(t *testing.T) { + q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} + res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true}) + + // 4096 (content) + 2 (overhead: newlines or doc sections) + if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { + t.Errorf("got content I/O %d, want %d", got, want) + } + + // 1024 entries, each 4 bytes apart. 4 fits into single byte + // delta encoded. + if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { + t.Errorf("got index I/O %d, want %d", got, want) + } + }) + + t.Run("ChunkMatches with BM25", func(t *testing.T) { + q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} + res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true}) + + // 4096 (content) + 2 (overhead: newlines or doc sections) + if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { + t.Errorf("got content I/O %d, want %d", got, want) + } + + // 1024 entries, each 4 bytes apart. 4 fits into single byte + // delta encoded. + if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { + t.Errorf("got index I/O %d, want %d", got, want) + } + }) } func TestStartLineAnchor(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: []byte( @@ -2543,9 +2742,9 @@ func TestAndOrUnicode(t *testing.T) { query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, query.NewOr(&query.Branch{Pattern: "master"})))) - b := testIndexBuilder(t, &Repository{ + b := testShardBuilder(t, &zoekt.Repository{ Name: "name", - Branches: []RepositoryBranch{{"master", "master-version"}}, + Branches: []zoekt.RepositoryBranch{{"master", "master-version"}}, }, Document{ Name: "f2", Content: []byte("orange\u2318apple"), @@ -2570,7 +2769,7 @@ func TestAndOrUnicode(t *testing.T) { func TestAndShort(t *testing.T) { content := []byte("bla needle at orange bla") - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: content}, Document{Name: "f2", Content: []byte("xx at xx")}, Document{Name: "f3", Content: []byte("yy orange xx")}, @@ -2596,7 +2795,7 @@ func TestAndShort(t *testing.T) { func TestNoCollectRegexpSubstring(t *testing.T) { content := []byte("bla final bla\nfoo final, foo") - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: content}, ) @@ -2610,7 +2809,7 @@ func TestNoCollectRegexpSubstring(t *testing.T) { t.Fatalf("got %v, want 1 result", res.Files) } if f := res.Files[0]; len(f.LineMatches) != 1 { - t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) + t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches)) } }) @@ -2620,12 +2819,12 @@ func TestNoCollectRegexpSubstring(t *testing.T) { t.Fatalf("got %v, want 1 result", res.Files) } if f := res.Files[0]; len(f.ChunkMatches) != 1 { - t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) + t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches)) } }) } -func printLineMatches(ms []LineMatch) string { +func printLineMatches(ms []zoekt.LineMatch) string { var ss []string for _, m := range ms { ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) @@ -2636,7 +2835,7 @@ func printLineMatches(ms []LineMatch) string { func TestLang(t *testing.T) { content := []byte("bla needle bla") - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: content}, Document{Name: "f2", Language: "java", Content: content}, Document{Name: "f3", Language: "cpp", Content: content}, @@ -2670,7 +2869,7 @@ func TestLang(t *testing.T) { func TestLangShortcut(t *testing.T) { content := []byte("bla needle bla") - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f2", Language: "java", Content: content}, Document{Name: "f3", Language: "cpp", Content: content}, ) @@ -2684,7 +2883,7 @@ func TestLangShortcut(t *testing.T) { t.Fatalf("got %v, want 0 results", res.Files) } if res.Stats.IndexBytesLoaded > 0 { - t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) + t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) } }) @@ -2694,14 +2893,14 @@ func TestLangShortcut(t *testing.T) { t.Fatalf("got %v, want 0 results", res.Files) } if res.Stats.IndexBytesLoaded > 0 { - t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) + t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) } }) } func TestNoTextMatchAtoms(t *testing.T) { content := []byte("bla needle bla") - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: content}, Document{Name: "f2", Language: "java", Content: content}, Document{Name: "f3", Language: "cpp", Content: content}, @@ -2724,7 +2923,7 @@ func TestNoTextMatchAtoms(t *testing.T) { func TestNoPositiveAtoms(t *testing.T) { content := []byte("bla needle bla") - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: content}, Document{Name: "f2", Content: content}, ) @@ -2750,7 +2949,7 @@ func TestSymbolBoundaryStart(t *testing.T) { content := []byte("start\nbla bla\nend") // ----------------012345-67890123-456 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{ Name: "f1", Content: content, @@ -2787,7 +2986,7 @@ func TestSymbolBoundaryEnd(t *testing.T) { content := []byte("start\nbla bla\nend") // ----------------012345-67890123-456 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{ Name: "f1", Content: content, @@ -2824,7 +3023,7 @@ func TestSymbolSubstring(t *testing.T) { content := []byte("bla\nsymblabla\nbla") // ----------------0123-4567890123-456 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{ Name: "f1", Content: content, @@ -2861,7 +3060,7 @@ func TestSymbolSubstringExact(t *testing.T) { content := []byte("bla\nsym\nbla\nsym\nasymb") // ----------------0123-4567-890123456-78901 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{ Name: "f1", Content: content, @@ -2898,7 +3097,7 @@ func TestSymbolRegexpExact(t *testing.T) { content := []byte("blah\nbla\nbl") // ----------------01234-5678-90 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{ Name: "f1", Content: content, @@ -2935,7 +3134,7 @@ func TestSymbolRegexpPartial(t *testing.T) { content := []byte("abcdef") // ----------------012345 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{ Name: "f1", Content: content, @@ -2990,7 +3189,7 @@ func TestSymbolRegexpAll(t *testing.T) { }, } - b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...) + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, docs...) q := &query.Symbol{ Expr: &query.Regexp{Regexp: mustParseRE(".*")}, } @@ -3025,7 +3224,7 @@ func TestSymbolRegexpAll(t *testing.T) { } for j, sec := range want.Symbols { - if sec.Start != uint32(got[j].Start.ByteOffset) { + if sec.Start != got[j].Start.ByteOffset { t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name) } } @@ -3035,11 +3234,11 @@ func TestSymbolRegexpAll(t *testing.T) { func TestHitIterTerminate(t *testing.T) { // contrived input: trigram frequencies forces selecting abc + - // def for the distance iteration. There is no match, so this + // def for the distance iteration. There is no index, so this // will advance the compressedPostingIterator to beyond the // end. content := []byte("abc bcdbcd cdecde abcabc def efg") - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: content, @@ -3057,7 +3256,7 @@ func TestHitIterTerminate(t *testing.T) { func TestDistanceHitIterBailLast(t *testing.T) { content := []byte("AST AST AST UASH") - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: content, @@ -3080,9 +3279,9 @@ func TestDistanceHitIterBailLast(t *testing.T) { func TestDocumentSectionRuneBoundary(t *testing.T) { content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) - b, err := NewIndexBuilder(nil) + b, err := NewShardBuilder(nil) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } for i, sec := range []DocumentSection{ @@ -3101,7 +3300,7 @@ func TestDocumentSectionRuneBoundary(t *testing.T) { func TestUnicodeQuery(t *testing.T) { content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: []byte(content), @@ -3159,9 +3358,9 @@ func TestSkipInvalidContent(t *testing.T) { "abc def \x00 abc", } { - b, err := NewIndexBuilder(nil) + b, err := NewShardBuilder(nil) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } if err := b.Add(Document{ @@ -3206,31 +3405,31 @@ func TestDocChecker(t *testing.T) { // Test valid and invalid text for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} { - if err := docChecker.Check([]byte(text), 20000, false); err != nil { - t.Errorf("Check(%q): %v", text, err) + if skip := docChecker.Check([]byte(text), 20000, false); skip != SkipReasonNone { + t.Errorf("Check(%q): %v", text, skip) } } for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} { - if err := docChecker.Check([]byte(text), 15, false); err == nil { + if skip := docChecker.Check([]byte(text), 15, false); skip == SkipReasonNone { t.Errorf("Check(%q) succeeded", text) } } // Test valid and invalid text with an allowed large file for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} { - if err := docChecker.Check([]byte(text), 15, true); err != nil { - t.Errorf("Check(%q): %v", text, err) + if skip := docChecker.Check([]byte(text), 15, true); skip != SkipReasonNone { + t.Errorf("Check(%q): %v", text, skip) } } for _, text := range []string{"zero\x00byte", "xx"} { - if err := docChecker.Check([]byte(text), 15, true); err == nil { + if skip := docChecker.Check([]byte(text), 15, true); skip == SkipReasonNone { t.Errorf("Check(%q) succeeded", text) } } } func TestLineAnd(t *testing.T) { - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, Document{Name: "f2", Content: []byte("apple orange\nbanana")}, Document{Name: "f3", Content: []byte("banana grape")}, @@ -3266,7 +3465,7 @@ func TestLineAnd(t *testing.T) { } func TestLineAndFileName(t *testing.T) { - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: []byte("apple banana\ngrape")}, Document{Name: "f2", Content: []byte("apple banana\norange")}, Document{Name: "apple banana", Content: []byte("banana grape")}, @@ -3302,7 +3501,7 @@ func TestLineAndFileName(t *testing.T) { } func TestMultiLineRegex(t *testing.T) { - b := testIndexBuilder(t, &Repository{Name: "reponame"}, + b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, Document{Name: "f1", Content: []byte("apple banana\ngrape")}, Document{Name: "f2", Content: []byte("apple orange")}, Document{Name: "f3", Content: []byte("grape apple")}, @@ -3346,7 +3545,7 @@ func TestMultiLineRegex(t *testing.T) { } func TestSearchTypeFileName(t *testing.T) { - b := testIndexBuilder(t, &Repository{ + b := testShardBuilder(t, &zoekt.Repository{ Name: "reponame", }, Document{Name: "f1", Content: []byte("bla the needle")}, @@ -3355,7 +3554,7 @@ func TestSearchTypeFileName(t *testing.T) { ) t.Run("LineMatches", func(t *testing.T) { - wantSingleMatch := func(res *SearchResult, want string) { + wantSingleMatch := func(res *zoekt.SearchResult, want string) { t.Helper() fmatches := res.Files if len(fmatches) != 1 { @@ -3397,7 +3596,7 @@ func TestSearchTypeFileName(t *testing.T) { }) t.Run("ChunkMatches", func(t *testing.T) { - wantSingleMatch := func(res *SearchResult, want string) { + wantSingleMatch := func(res *zoekt.SearchResult, want string) { t.Helper() fmatches := res.Files if len(fmatches) != 1 { @@ -3444,7 +3643,7 @@ func TestSearchTypeFileName(t *testing.T) { } func TestSearchTypeLanguage(t *testing.T) { - b := testIndexBuilder(t, &Repository{ + b := testShardBuilder(t, &zoekt.Repository{ Name: "reponame", }, Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, @@ -3456,7 +3655,7 @@ func TestSearchTypeLanguage(t *testing.T) { t.Log(b.languageMap) t.Run("LineMatches", func(t *testing.T) { - wantSingleMatch := func(res *SearchResult, want string) { + wantSingleMatch := func(res *zoekt.SearchResult, want string) { t.Helper() fmatches := res.Files if len(fmatches) != 1 { @@ -3503,7 +3702,7 @@ func TestSearchTypeLanguage(t *testing.T) { }) t.Run("ChunkMatches", func(t *testing.T) { - wantSingleMatch := func(res *SearchResult, want string) { + wantSingleMatch := func(res *zoekt.SearchResult, want string) { t.Helper() fmatches := res.Files if len(fmatches) != 1 { @@ -3551,23 +3750,23 @@ func TestSearchTypeLanguage(t *testing.T) { func TestStats(t *testing.T) { ignored := []cmp.Option{ cmpopts.EquateEmpty(), - cmpopts.IgnoreFields(RepoListEntry{}, "Repository"), - cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), - cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), + cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "Repository"), + cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), + cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), } - repoListEntries := func(b *IndexBuilder) []RepoListEntry { + repoListEntries := func(b *ShardBuilder) []zoekt.RepoListEntry { searcher := searcherForTest(t, b) indexdata := searcher.(*indexData) return indexdata.repoListEntry } t.Run("one empty repo", func(t *testing.T) { - b := testIndexBuilder(t, nil) + b := testShardBuilder(t, nil) got := repoListEntries(b) - want := []RepoListEntry{ + want := []zoekt.RepoListEntry{ { - Stats: RepoStats{ + Stats: zoekt.RepoStats{ Repos: 0, Shards: 1, Documents: 0, @@ -3586,14 +3785,14 @@ func TestStats(t *testing.T) { }) t.Run("one simple shard", func(t *testing.T) { - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{Name: "doc 0", Content: []byte("content 0")}, Document{Name: "doc 1", Content: []byte("content 1")}, ) got := repoListEntries(b) - want := []RepoListEntry{ + want := []zoekt.RepoListEntry{ { - Stats: RepoStats{ + Stats: zoekt.RepoStats{ Repos: 0, Shards: 1, Documents: 2, @@ -3612,8 +3811,8 @@ func TestStats(t *testing.T) { }) t.Run("one compound shard", func(t *testing.T) { - b := testIndexBuilderCompound(t, - []*Repository{ + b := testShardBuilderCompound(t, + []*zoekt.Repository{ {Name: "repo 0"}, {Name: "repo 1"}, }, @@ -3629,9 +3828,9 @@ func TestStats(t *testing.T) { }, ) got := repoListEntries(b) - want := []RepoListEntry{ + want := []zoekt.RepoListEntry{ { - Stats: RepoStats{ + Stats: zoekt.RepoStats{ Repos: 0, Shards: 1, Documents: 2, @@ -3643,7 +3842,7 @@ func TestStats(t *testing.T) { }, }, { - Stats: RepoStats{ + Stats: zoekt.RepoStats{ Repos: 0, Shards: 1, Documents: 2, @@ -3662,8 +3861,8 @@ func TestStats(t *testing.T) { }) t.Run("compound shard with empty repos", func(t *testing.T) { - b := testIndexBuilderCompound(t, - []*Repository{ + b := testShardBuilderCompound(t, + []*zoekt.Repository{ {Name: "repo 0"}, {Name: "repo 1"}, {Name: "repo 2"}, @@ -3680,18 +3879,18 @@ func TestStats(t *testing.T) { ) got := repoListEntries(b) - entryEmpty := RepoListEntry{Stats: RepoStats{ + entryEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{ Shards: 1, Documents: 0, ContentBytes: 0, }} - entryNonEmpty := RepoListEntry{Stats: RepoStats{ + entryNonEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{ Shards: 1, Documents: 1, ContentBytes: 14, }} - want := []RepoListEntry{ + want := []zoekt.RepoListEntry{ entryNonEmpty, entryEmpty, entryNonEmpty, @@ -3710,7 +3909,7 @@ func TestWordSearch(t *testing.T) { content := []byte("needle the bla") // ----------------01234567890123 - b := testIndexBuilder(t, nil, + b := testShardBuilder(t, nil, Document{ Name: "f1", Content: content, @@ -3733,8 +3932,8 @@ func TestWordSearch(t *testing.T) { } got := sres.Files[0].LineMatches[0] - want := LineMatch{ - LineFragments: []LineFragmentMatch{{ + want := zoekt.LineMatch{ + LineFragments: []zoekt.LineFragmentMatch{{ LineOffset: 7, Offset: 7, MatchLength: 3, @@ -3767,12 +3966,12 @@ func TestWordSearch(t *testing.T) { } got := sres.Files[0].ChunkMatches[0] - want := ChunkMatch{ + want := zoekt.ChunkMatch{ Content: content, - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, - Ranges: []Range{{ - Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8}, - End: Location{ByteOffset: 10, LineNumber: 1, Column: 11}, + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, + Ranges: []zoekt.Range{{ + Start: zoekt.Location{ByteOffset: 7, LineNumber: 1, Column: 8}, + End: zoekt.Location{ByteOffset: 10, LineNumber: 1, Column: 11}, }}, } @@ -3781,3 +3980,40 @@ func TestWordSearch(t *testing.T) { } }) } + +// Simple benchmark focused on chunk match scoring. It creates a single file that will have a 1000-line chunk match. +// The benchmark time is expected to be strongly correlated with time spent assembling and scoring this chunk. +func BenchmarkScoreChunkMatches(b *testing.B) { + ctx := context.Background() + var builder strings.Builder + for i := range 1000 { + builder.WriteString(fmt.Sprintf("line-%d one one one two two two three three three four four four five five\n", i)) + } + + searcher := searcherForTest(b, testShardBuilder(b, nil, + Document{Name: "f1", Content: []byte(builder.String())}, + )) + + q := &query.Or{ + Children: []query.Q{ + &query.Substring{Pattern: "f"}, + &query.Substring{Pattern: "t"}, + }} + + b.Run("score large ChunkMatch", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{ChunkMatches: true, NumContextLines: 1}) + if err != nil { + b.Fatal(err) + } + + matches := sres.Files + if len(matches) == 0 { + b.Fatalf("want file index, got none") + } + } + }) +} diff --git a/indexdata.go b/index/indexdata.go similarity index 93% rename from indexdata.go rename to index/indexdata.go index 3e8095d3b..04359f408 100644 --- a/indexdata.go +++ b/index/indexdata.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "encoding/binary" @@ -20,10 +20,12 @@ import ( "fmt" "hash/crc64" "log" + "math" "math/bits" "slices" "unicode/utf8" + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/query" ) @@ -76,8 +78,8 @@ type indexData struct { // name => mask (power of 2) branchIDs []map[string]uint - metaData IndexMetadata - repoMetaData []Repository + metaData zoekt.IndexMetadata + repoMetaData []zoekt.Repository subRepos []uint32 subRepoPaths [][]string @@ -91,13 +93,19 @@ type indexData struct { // inverse of LanguageMap in metaData languageMap map[uint16]string - repoListEntry []RepoListEntry + // file categories for all the files. + categories []byte + + repoListEntry []zoekt.RepoListEntry // repository indexes for all the files repos []uint16 // rawConfigMasks contains the encoded RawConfig for each repository rawConfigMasks []uint8 + + // Cache for docMatchTree objects + docMatchTreeCache *docMatchTreeCache } type symbolData struct { @@ -139,7 +147,7 @@ func (d *symbolData) kind(i uint32) []byte { } // data returns the symbol at index i -func (d *symbolData) data(i uint32) *Symbol { +func (d *symbolData) data(i uint32) *zoekt.Symbol { size := uint32(4 * 4) // 4 uint32s offset := i * size if offset >= uint32(len(d.symMetaData)) { @@ -147,7 +155,7 @@ func (d *symbolData) data(i uint32) *Symbol { } metadata := d.symMetaData[offset : offset+size] - sym := &Symbol{} + sym := &zoekt.Symbol{} key := uint32SliceAt(metadata, 1) sym.Kind = string(d.kind(key)) key = uint32SliceAt(metadata, 2) @@ -171,11 +179,23 @@ func (d *indexData) getLanguage(idx uint32) uint16 { return uint16(d.languages[idx*2]) | uint16(d.languages[idx*2+1])<<8 } +func (d *indexData) getCategory(idx uint32) FileCategory { + if len(d.categories) == 0 { + // This means we're reading an older index, so return 'missing' + return FileCategoryMissing + } + category, err := decodeCategory(d.categories[idx]) + if err != nil { + return FileCategoryMissing + } + return category +} + // calculates stats for files in the range [start, end). -func (d *indexData) calculateStatsForFileRange(start, end uint32) RepoStats { +func (d *indexData) calculateStatsForFileRange(start, end uint32) zoekt.RepoStats { if start >= end { // An empty shard for an empty repository. - return RepoStats{ + return zoekt.RepoStats{ Shards: 1, } } @@ -190,7 +210,7 @@ func (d *indexData) calculateStatsForFileRange(start, end uint32) RepoStats { // here). Right now I don't like that these numbers are not true, especially // after aggregation. For now I will move forward with this until we can // chat more. - return RepoStats{ + return zoekt.RepoStats{ ContentBytes: int64(bytesContent) + int64(bytesFN), Documents: int(end - start), // CR keegan for stefan: our shard count is going to go out of whack, @@ -206,7 +226,7 @@ func (d *indexData) calculateStatsForFileRange(start, end uint32) RepoStats { } func (d *indexData) calculateStats() error { - d.repoListEntry = make([]RepoListEntry, 0, len(d.repoMetaData)) + d.repoListEntry = make([]zoekt.RepoListEntry, 0, len(d.repoMetaData)) var start, end uint32 for repoID, md := range d.repoMetaData { @@ -218,7 +238,7 @@ func (d *indexData) calculateStats() error { return fmt.Errorf("shard documents out of order with respect to repositories: expected document %d to be part of repo %d", start, repoID) } - d.repoListEntry = append(d.repoListEntry, RepoListEntry{ + d.repoListEntry = append(d.repoListEntry, zoekt.RepoListEntry{ Repository: md, IndexMetadata: d.metaData, Stats: d.calculateStatsForFileRange(start, end), @@ -338,12 +358,10 @@ func findSelectiveNgrams(ngramOffs []runeNgramOff, indexMap []int, frequencies [ return } -const maxUInt32 = 0xffffffff - func minFrequencyNgramOffsets(ngramOffs []runeNgramOff, frequencies []uint32) (first, last runeNgramOff) { // Find the two lowest frequency ngrams. idx0, idx1 := 0, 0 - min0, min1 := uint32(maxUInt32), uint32(maxUInt32) + min0, min1 := uint32(math.MaxUint32), uint32(math.MaxUint32) for i, x := range frequencies { if x <= min0 { idx0, idx1 = i, idx0 @@ -429,7 +447,7 @@ func (d *indexData) iterateNgrams(query *query.Substring) (*ngramIterationResult return &ngramIterationResults{ matchIterator: &noMatchTree{ Why: "freq=0", - Stats: Stats{ + Stats: zoekt.Stats{ NgramLookups: ngramLookups, }, }, diff --git a/indexdata_test.go b/index/indexdata_test.go similarity index 99% rename from indexdata_test.go rename to index/indexdata_test.go index f5bd677c8..82044f0e7 100644 --- a/indexdata_test.go +++ b/index/indexdata_test.go @@ -1,4 +1,4 @@ -package zoekt +package index import ( "math/rand" diff --git a/indexfile_unix.go b/index/indexfile.go similarity index 95% rename from indexfile_unix.go rename to index/indexfile.go index fea4fae76..72e37101d 100644 --- a/indexfile_unix.go +++ b/index/indexfile.go @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build linux || darwin +//go:build linux || darwin || freebsd -package zoekt +package index import ( "fmt" "log" + "math" "os" "golang.org/x/sys/unix" @@ -62,7 +63,7 @@ func NewIndexFile(f *os.File) (IndexFile, error) { } sz := fi.Size() - if sz >= maxUInt32 { + if sz >= math.MaxUint32 { return nil, fmt.Errorf("file %s too large: %d", f.Name(), sz) } r := &mmapedIndexFile{ diff --git a/limit.go b/index/limit.go similarity index 81% rename from limit.go rename to index/limit.go index 1f67fc3fd..9de2b1eab 100644 --- a/limit.go +++ b/index/limit.go @@ -1,11 +1,15 @@ -package zoekt +package index -import "log" +import ( + "log" + + "github.com/sourcegraph/zoekt" +) // SortAndTruncateFiles is a convenience around SortFiles and // DisplayTruncator. Given an aggregated files it will sort and then truncate // based on the search options. -func SortAndTruncateFiles(files []FileMatch, opts *SearchOptions) []FileMatch { +func SortAndTruncateFiles(files []zoekt.FileMatch, opts *zoekt.SearchOptions) []zoekt.FileMatch { SortFiles(files) truncator, _ := NewDisplayTruncator(opts) files, _ = truncator(files) @@ -16,12 +20,12 @@ func SortAndTruncateFiles(files []FileMatch, opts *SearchOptions) []FileMatch { // display limits by truncating and mutating before. hasMore is true until the // limits are exhausted. Once hasMore is false each subsequent call will // return an empty after and hasMore false. -type DisplayTruncator func(before []FileMatch) (after []FileMatch, hasMore bool) +type DisplayTruncator func(before []zoekt.FileMatch) (after []zoekt.FileMatch, hasMore bool) // NewDisplayTruncator will return a DisplayTruncator which enforces the limits in // opts. If there are no limits to enforce, hasLimits is false and there is no // need to call DisplayTruncator. -func NewDisplayTruncator(opts *SearchOptions) (_ DisplayTruncator, hasLimits bool) { +func NewDisplayTruncator(opts *zoekt.SearchOptions) (_ DisplayTruncator, hasLimits bool) { docLimit := opts.MaxDocDisplayCount docLimited := docLimit > 0 @@ -31,12 +35,12 @@ func NewDisplayTruncator(opts *SearchOptions) (_ DisplayTruncator, hasLimits boo done := false if !docLimited && !matchLimited { - return func(fm []FileMatch) ([]FileMatch, bool) { + return func(fm []zoekt.FileMatch) ([]zoekt.FileMatch, bool) { return fm, true }, false } - return func(fm []FileMatch) ([]FileMatch, bool) { + return func(fm []zoekt.FileMatch) ([]zoekt.FileMatch, bool) { if done { return nil, false } @@ -60,8 +64,8 @@ func NewDisplayTruncator(opts *SearchOptions) (_ DisplayTruncator, hasLimits boo }, true } -func limitMatches(files []FileMatch, limit int, chunkMatches bool) ([]FileMatch, int) { - var limiter func(file *FileMatch, limit int) int +func limitMatches(files []zoekt.FileMatch, limit int, chunkMatches bool) ([]zoekt.FileMatch, int) { + var limiter func(file *zoekt.FileMatch, limit int) int if chunkMatches { limiter = limitChunkMatches } else { @@ -78,7 +82,7 @@ func limitMatches(files []FileMatch, limit int, chunkMatches bool) ([]FileMatch, // Limit the number of ChunkMatches in the given FileMatch, returning the // remaining limit, if any. -func limitChunkMatches(file *FileMatch, limit int) int { +func limitChunkMatches(file *zoekt.FileMatch, limit int) int { for i := range file.ChunkMatches { cm := &file.ChunkMatches[i] if len(cm.Ranges) > limit { @@ -127,7 +131,7 @@ func limitChunkMatches(file *FileMatch, limit int) int { // Limit the number of LineMatches in the given FileMatch, returning the // remaining limit, if any. -func limitLineMatches(file *FileMatch, limit int) int { +func limitLineMatches(file *zoekt.FileMatch, limit int) int { for i := range file.LineMatches { lm := &file.LineMatches[i] if len(lm.LineFragments) > limit { diff --git a/limit_test.go b/index/limit_test.go similarity index 80% rename from limit_test.go rename to index/limit_test.go index b6df2c492..0e9dff270 100644 --- a/limit_test.go +++ b/index/limit_test.go @@ -1,4 +1,4 @@ -package zoekt +package index import ( "bytes" @@ -6,6 +6,8 @@ import ( "testing" "github.com/google/go-cmp/cmp" + + "github.com/sourcegraph/zoekt" ) func TestLimitMatches(t *testing.T) { @@ -62,8 +64,8 @@ func TestLimitMatches(t *testing.T) { for _, tc := range cases { t.Run("ChunkMatches", func(t *testing.T) { // Generate a ChunkMatch suitable for testing `LimitChunkMatches`. - generateChunkMatch := func(numRanges, lineNumber int) (ChunkMatch, int) { - cm := ChunkMatch{SymbolInfo: make([]*Symbol, numRanges)} + generateChunkMatch := func(numRanges, lineNumber int) (zoekt.ChunkMatch, int) { + cm := zoekt.ChunkMatch{SymbolInfo: make([]*zoekt.Symbol, numRanges)} // To simplify testing, we generate Content and the associated // Ranges with fixed logic: each ChunkMatch has 1 line of @@ -74,13 +76,13 @@ func TestLimitMatches(t *testing.T) { // 1 line of context. cm.Content = append(cm.Content, []byte("context\n")...) for i := 0; i < numRanges; i += 1 { - cm.Ranges = append(cm.Ranges, Range{ + cm.Ranges = append(cm.Ranges, zoekt.Range{ // We only provide LineNumber as that's all that's // relevant. - Start: Location{LineNumber: uint32(lineNumber + (2 * i) + 1)}, - End: Location{LineNumber: uint32(lineNumber + (2 * i) + 2)}, + Start: zoekt.Location{LineNumber: uint32(lineNumber + (2 * i) + 1)}, + End: zoekt.Location{LineNumber: uint32(lineNumber + (2 * i) + 2)}, }) - cm.Content = append(cm.Content, []byte(fmt.Sprintf("range%dStart\nrange%dEnd\n", i, i))...) + cm.Content = append(cm.Content, fmt.Appendf(nil, "range%dStart\nrange%dEnd\n", i, i)...) } // 1 line of context. Content in zoekt notably just does not // contain a trailing newline. @@ -91,19 +93,19 @@ func TestLimitMatches(t *testing.T) { return cm, lineNumber + (2 * numRanges) + 4 } - res := SearchResult{} + res := zoekt.SearchResult{} for _, file := range tc.in { - fm := FileMatch{} + fm := zoekt.FileMatch{} lineNumber := 0 for _, numRanges := range file { - var cm ChunkMatch + var cm zoekt.ChunkMatch cm, lineNumber = generateChunkMatch(numRanges, lineNumber) fm.ChunkMatches = append(fm.ChunkMatches, cm) } res.Files = append(res.Files, fm) } - res.Files = SortAndTruncateFiles(res.Files, &SearchOptions{ + res.Files = SortAndTruncateFiles(res.Files, &zoekt.SearchOptions{ MaxMatchDisplayCount: tc.limit, ChunkMatches: true, }) @@ -133,16 +135,16 @@ func TestLimitMatches(t *testing.T) { }) t.Run("LineMatches", func(t *testing.T) { - res := SearchResult{} + res := zoekt.SearchResult{} for _, file := range tc.in { - fm := FileMatch{} + fm := zoekt.FileMatch{} for _, numFragments := range file { - fm.LineMatches = append(fm.LineMatches, LineMatch{LineFragments: make([]LineFragmentMatch, numFragments)}) + fm.LineMatches = append(fm.LineMatches, zoekt.LineMatch{LineFragments: make([]zoekt.LineFragmentMatch, numFragments)}) } res.Files = append(res.Files, fm) } - res.Files = SortAndTruncateFiles(res.Files, &SearchOptions{ + res.Files = SortAndTruncateFiles(res.Files, &zoekt.SearchOptions{ MaxMatchDisplayCount: tc.limit, ChunkMatches: false, }) diff --git a/matchiter.go b/index/matchiter.go similarity index 93% rename from matchiter.go rename to index/matchiter.go index 98bf6b1ca..df75410b5 100644 --- a/matchiter.go +++ b/index/matchiter.go @@ -12,11 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bytes" "fmt" + "math" + + "github.com/sourcegraph/zoekt" ) // candidateMatch is a candidate match for a substring. @@ -73,7 +76,7 @@ type matchIterator interface { // updateStats is called twice. After matchtree construction and after // searching is done. Implementations must take care to not report // statistics twice. - updateStats(*Stats) + updateStats(*zoekt.Stats) } // noMatchTree is both matchIterator and matchTree that matches nothing. @@ -81,7 +84,7 @@ type noMatchTree struct { Why string // Stats captures the work done to create the noMatchTree. - Stats Stats + Stats zoekt.Stats } func (t *noMatchTree) String() string { @@ -93,7 +96,7 @@ func (t *noMatchTree) candidates() []*candidateMatch { } func (t *noMatchTree) nextDoc() uint32 { - return maxUInt32 + return math.MaxUint32 } func (t *noMatchTree) prepare(uint32) {} @@ -102,9 +105,9 @@ func (t *noMatchTree) matches(cp *contentProvider, cost int, known map[matchTree return matchesNone } -func (t *noMatchTree) updateStats(s *Stats) { +func (t *noMatchTree) updateStats(s *zoekt.Stats) { s.Add(t.Stats) - t.Stats = Stats{} + t.Stats = zoekt.Stats{} } func (m *candidateMatch) String() string { @@ -146,7 +149,7 @@ func nextFileIndex(offset, f uint32, ends []uint32) uint32 { func (i *ngramDocIterator) nextDoc() uint32 { i.fileIdx = nextFileIndex(i.iter.first(), i.fileIdx, i.ends) if i.fileIdx >= uint32(len(i.ends)) { - return maxUInt32 + return math.MaxUint32 } return i.fileIdx } @@ -166,7 +169,7 @@ func (i *ngramDocIterator) prepare(nextDoc uint32) { i.fileIdx = nextDoc } -func (i *ngramDocIterator) updateStats(s *Stats) { +func (i *ngramDocIterator) updateStats(s *zoekt.Stats) { i.iter.updateStats(s) s.NgramMatches += i.matchCount s.NgramLookups += i.ngramLookups @@ -188,7 +191,7 @@ func (i *ngramDocIterator) candidates() []*candidateMatch { var candidates []*candidateMatch for { p1 := i.iter.first() - if p1 == maxUInt32 || p1 >= i.ends[i.fileIdx] { + if p1 == math.MaxUint32 || p1 >= i.ends[i.fileIdx] { break } i.iter.next(p1) diff --git a/index/matchiter_test.go b/index/matchiter_test.go new file mode 100644 index 000000000..5de9075e8 --- /dev/null +++ b/index/matchiter_test.go @@ -0,0 +1,40 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package index + +import ( + "reflect" + "testing" +) + +func TestMatchSize(t *testing.T) { + cases := []struct { + v any + size int + }{{ + v: candidateMatch{}, + size: 80, + }, { + v: candidateChunk{}, + size: 40, + }} + for _, c := range cases { + got := reflect.TypeOf(c.v).Size() + if int(got) != c.size { + t.Errorf(`sizeof struct %T has changed from %d to %d. +These are match structs that occur a lot in memory, so we optimize size. +When changing, please ensure there isn't unnecessary padding via the +tool fieldalignment then update this test.`, c.v, c.size, got) + } + } +} diff --git a/matchtree.go b/index/matchtree.go similarity index 93% rename from matchtree.go rename to index/matchtree.go index ce30f0980..6b0b91bf9 100644 --- a/matchtree.go +++ b/index/matchtree.go @@ -12,18 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bytes" "fmt" "log" + "math" "regexp/syntax" "strings" "unicode/utf8" + "github.com/cespare/xxhash/v2" "github.com/grafana/regexp" + "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/internal/hybridre2" "github.com/sourcegraph/zoekt/internal/syntaxutil" "github.com/sourcegraph/zoekt/query" ) @@ -184,6 +188,11 @@ type noVisitMatchTree struct { type regexpMatchTree struct { regexp *regexp.Regexp + // hybridRegexp is a size-aware wrapper that dispatches to go-re2 for + // large file content when ZOEKT_RE2_THRESHOLD_BYTES is configured. + // For small inputs and filename matches, regexp is used directly. + hybridRegexp *hybridre2.Regexp + // origRegexp is the original parsed regexp from the query structure. It // does not include mutations such as case sensitivity. origRegexp *syntax.Regexp @@ -204,10 +213,19 @@ func newRegexpMatchTree(s *query.Regexp) *regexpMatchTree { prefix = "(?i)" } + pattern := prefix + syntaxutil.RegexpString(s.Regexp) + + // hybridRegexp is only used for file content matching; skip the RE2 + // compilation overhead for filename-only regexps. + var hr *hybridre2.Regexp + if !s.FileName { + hr = hybridre2.MustCompile(pattern) + } return ®expMatchTree{ - regexp: regexp.MustCompile(prefix + syntaxutil.RegexpString(s.Regexp)), - origRegexp: s.Regexp, - fileName: s.FileName, + regexp: regexp.MustCompile(pattern), + hybridRegexp: hr, + origRegexp: s.Regexp, + fileName: s.FileName, } } @@ -442,7 +460,7 @@ func (t *docMatchTree) nextDoc() uint32 { return i } } - return maxUInt32 + return math.MaxUint32 } func (t *bruteForceMatchTree) nextDoc() uint32 { @@ -464,7 +482,7 @@ func (t *andMatchTree) nextDoc() uint32 { } func (t *orMatchTree) nextDoc() uint32 { - min := uint32(maxUInt32) + min := uint32(math.MaxUint32) for _, c := range t.children { m := c.nextDoc() if m < min { @@ -497,7 +515,7 @@ func (t *branchQueryMatchTree) nextDoc() uint32 { return i } } - return maxUInt32 + return math.MaxUint32 } // all String methods @@ -604,9 +622,9 @@ func visitMatchTree(t matchTree, f func(matchTree)) { // updateMatchTreeStats calls updateStats on all atoms in mt which have that // function defined. -func updateMatchTreeStats(mt matchTree, stats *Stats) { +func updateMatchTreeStats(mt matchTree, stats *zoekt.Stats) { visitMatchTree(mt, func(mt matchTree) { - if atom, ok := mt.(interface{ updateStats(*Stats) }); ok { + if atom, ok := mt.(interface{ updateStats(*zoekt.Stats) }); ok { atom.updateStats(stats) } }) @@ -672,7 +690,7 @@ func (t *andLineMatchTree) matches(cp *contentProvider, cost int, known map[matc // can return MatchesFound. // find child with fewest candidates - min := maxUInt32 + minCount := math.MaxInt fewestChildren := 0 for ix, child := range t.children { v, ok := child.(*substrMatchTree) @@ -681,8 +699,8 @@ func (t *andLineMatchTree) matches(cp *contentProvider, cost int, known map[matc if !ok || v.fileName { return matchesFound } - if len(v.current) < min { - min = len(v.current) + if len(v.current) < minCount { + minCount = len(v.current) fewestChildren = ix } } @@ -799,7 +817,17 @@ func (t *regexpMatchTree) matches(cp *contentProvider, cost int, known map[match } cp.stats.RegexpsConsidered++ - idxs := t.regexp.FindAllIndex(cp.data(t.fileName), -1) + data := cp.data(t.fileName) + // For file content, use hybridRegexp which dispatches to go-re2 when + // len(data) >= ZOEKT_RE2_THRESHOLD_BYTES. For filename matching, use + // grafana/regexp directly: filenames are always short, so the WASM + // call overhead of go-re2 outweighs any benefit. + var idxs [][]int + if t.fileName { + idxs = t.regexp.FindAllIndex(data, -1) + } else { + idxs = t.hybridRegexp.FindAllIndex(data, -1) + } found := t.found[:0] for _, idx := range idxs { cm := &candidateMatch{ @@ -969,6 +997,7 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) if q == nil { return nil, fmt.Errorf("got nil (sub)query") } + switch s := q.(type) { case *query.Regexp: // RegexpToMatchTreeRecursive tries to distill a matchTree that matches a @@ -1051,13 +1080,43 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) boost: s.Boost, }, nil + case *query.Meta: + checksum := queryMetaChecksum(s.Field, s.Value) + cacheKeyField := "Meta" + if cached, ok := d.docMatchTreeCache.Get(cacheKeyField, checksum); ok { + return cached, nil + } + + reposWant := make([]bool, len(d.repoMetaData)) + for repoIdx, r := range d.repoMetaData { + if r.Metadata != nil { + if val, ok := r.Metadata[s.Field]; ok && s.Value.MatchString(val) { + reposWant[repoIdx] = true + } + } + } + + mt := &docMatchTree{ + reason: "Meta", + numDocs: d.numDocs(), + predicate: func(docID uint32) bool { + repoIdx := d.repos[docID] + if int(repoIdx) >= len(reposWant) { + return false + } + return reposWant[repoIdx] + }, + } + d.docMatchTreeCache.Add(cacheKeyField, checksum, mt) + return mt, nil + case *query.Substring: return d.newSubstringMatchTree(s) case *query.Branch: masks := make([]uint64, 0, len(d.repoMetaData)) if s.Pattern == "HEAD" { - for i := 0; i < len(d.repoMetaData); i++ { + for range d.repoMetaData { masks = append(masks, 1) } } else { @@ -1411,3 +1470,11 @@ func isRegexpAll(r *syntax.Regexp) bool { return false } } + +func queryMetaChecksum(field string, value *regexp.Regexp) string { + h := xxhash.New() + h.Write([]byte(field)) + h.Write([]byte{':'}) + h.Write([]byte(value.String())) + return fmt.Sprintf("%x", h.Sum64()) +} diff --git a/matchtree_test.go b/index/matchtree_test.go similarity index 78% rename from matchtree_test.go rename to index/matchtree_test.go index 1fece0f5b..471bf0c8c 100644 --- a/matchtree_test.go +++ b/index/matchtree_test.go @@ -12,9 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( + "math" "reflect" "regexp/syntax" "testing" @@ -22,6 +23,7 @@ import ( "github.com/RoaringBitmap/roaring" "github.com/grafana/regexp" + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/query" ) @@ -287,7 +289,7 @@ func TestSymbolMatchTree(t *testing.T) { func TestRepoSet(t *testing.T) { d := &indexData{ - repoMetaData: []Repository{{Name: "r0"}, {Name: "r1"}, {Name: "r2"}, {Name: "r3"}}, + repoMetaData: []zoekt.Repository{{Name: "r0"}, {Name: "r1"}, {Name: "r2"}, {Name: "r3"}}, fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1}, repos: []uint16{0, 0, 1, 2, 3, 3}, } @@ -296,21 +298,21 @@ func TestRepoSet(t *testing.T) { t.Fatal(err) } want := []uint32{2, 4, 5} - for i := 0; i < len(want); i++ { + for i := range want { nextDoc := mt.nextDoc() if nextDoc != want[i] { t.Fatalf("want %d, got %d", want[i], nextDoc) } mt.prepare(nextDoc) } - if mt.nextDoc() != maxUInt32 { + if mt.nextDoc() != math.MaxUint32 { t.Fatalf("expected %d document, but got at least 1 more", len(want)) } } func TestRepo(t *testing.T) { d := &indexData{ - repoMetaData: []Repository{{Name: "foo"}, {Name: "bar"}}, + repoMetaData: []zoekt.Repository{{Name: "foo"}, {Name: "bar"}}, fileBranchMasks: []uint64{1, 1, 1, 1, 1}, repos: []uint16{0, 0, 1, 0, 1}, } @@ -319,21 +321,21 @@ func TestRepo(t *testing.T) { t.Fatal(err) } want := []uint32{2, 4} - for i := 0; i < len(want); i++ { + for i := range want { nextDoc := mt.nextDoc() if nextDoc != want[i] { t.Fatalf("want %d, got %d", want[i], nextDoc) } mt.prepare(nextDoc) } - if mt.nextDoc() != maxUInt32 { + if mt.nextDoc() != math.MaxUint32 { t.Fatalf("expect %d documents, but got at least 1 more", len(want)) } } func TestBranchesRepos(t *testing.T) { d := &indexData{ - repoMetaData: []Repository{ + repoMetaData: []zoekt.Repository{ {ID: hash("foo"), Name: "foo"}, {ID: hash("bar"), Name: "bar"}, }, @@ -351,7 +353,7 @@ func TestBranchesRepos(t *testing.T) { } want := []uint32{3, 5} - for i := 0; i < len(want); i++ { + for i := range want { nextDoc := mt.nextDoc() if nextDoc != want[i] { t.Fatalf("want %d, got %d", want[i], nextDoc) @@ -359,14 +361,14 @@ func TestBranchesRepos(t *testing.T) { mt.prepare(nextDoc) } - if mt.nextDoc() != maxUInt32 { + if mt.nextDoc() != math.MaxUint32 { t.Fatalf("expect %d documents, but got at least 1 more", len(want)) } } func TestRepoIDs(t *testing.T) { d := &indexData{ - repoMetaData: []Repository{{Name: "r0", ID: 0}, {Name: "r1", ID: 1}, {Name: "r2", ID: 2}, {Name: "r3", ID: 3}}, + repoMetaData: []zoekt.Repository{{Name: "r0", ID: 0}, {Name: "r1", ID: 1}, {Name: "r2", ID: 2}, {Name: "r3", ID: 3}}, fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1}, repos: []uint16{0, 0, 1, 2, 3, 3}, } @@ -374,15 +376,16 @@ func TestRepoIDs(t *testing.T) { if err != nil { t.Fatal(err) } + want := []uint32{2, 4, 5} - for i := 0; i < len(want); i++ { + for i := range want { nextDoc := mt.nextDoc() if nextDoc != want[i] { t.Fatalf("want %d, got %d", want[i], nextDoc) } mt.prepare(nextDoc) } - if mt.nextDoc() != maxUInt32 { + if mt.nextDoc() != math.MaxUint32 { t.Fatalf("expected %d document, but got at least 1 more", len(want)) } } @@ -421,3 +424,69 @@ func TestIsRegexpAll(t *testing.T) { } } } + +func TestMetaQueryMatchTree(t *testing.T) { + d := &indexData{ + repoMetaData: []zoekt.Repository{ + {Name: "r0", Metadata: map[string]string{"license": "Apache-2.0"}}, + {Name: "r1", Metadata: map[string]string{"license": "MIT"}}, + {Name: "r2"}, // no metadata + {Name: "r3", Metadata: map[string]string{"haystack": "needle"}}, + {Name: "r4", Metadata: map[string]string{"note": "test"}}, + }, + fileBranchMasks: []uint64{1, 1, 1, 1, 1}, // 5 docs + repos: []uint16{0, 1, 2, 3, 4}, // map docIDs to repos + docMatchTreeCache: newDocMatchTreeCache(1), // small cache to test eviction + } + + q := &query.Meta{ + Field: "license", + Value: regexp.MustCompile("M.T"), + } + + mt, err := d.newMatchTree(q, matchTreeOpt{}) + if err != nil { + t.Fatalf("failed to build matchTree: %v", err) + } + + // Check that the docMatchTree cache is populated correctly + checksum := queryMetaChecksum("license", regexp.MustCompile("M.T")) + cacheKeyField := "Meta" + if _, ok := d.docMatchTreeCache.Get(cacheKeyField, checksum); !ok { + t.Errorf("expected docMatchTreeCache to be populated for key (%q, %q)", cacheKeyField, checksum) + } + + var matched []uint32 + for { + doc := mt.nextDoc() + if doc == math.MaxUint32 { + break + } + matched = append(matched, doc) + mt.prepare(doc) + } + + want := []uint32{1} // only doc from r1 should match + if !reflect.DeepEqual(matched, want) { + t.Errorf("meta match failed: got %v, want %v", matched, want) + } +} + +func Test_queryMetaCacheKey(t *testing.T) { + cases := []struct { + field string + pattern string + wantKey string + }{ + {"metaField", "foo.*bar", "24e88a5ffec04af0"}, + {"metaField", "foo.*baz", "d8d6f6a7f0725b61"}, + {"otherField", "foo.*bar", "c9d07e17c028364"}, + } + for _, tc := range cases { + re := regexp.MustCompile(tc.pattern) + key := queryMetaChecksum(tc.field, re) + if key != tc.wantKey { + t.Errorf("unexpected key for field=%q pattern=%q: got %q, want %q", tc.field, tc.pattern, key, tc.wantKey) + } + } +} diff --git a/merge.go b/index/merge.go similarity index 62% rename from merge.go rename to index/merge.go index 0473663e1..890015982 100644 --- a/merge.go +++ b/index/merge.go @@ -1,4 +1,4 @@ -package zoekt +package index import ( "crypto/sha1" @@ -10,7 +10,7 @@ import ( "runtime" "sort" - "github.com/sourcegraph/zoekt/internal/tenant" + "github.com/sourcegraph/zoekt" ) // Merge files into a compound shard in dstDir. Merge returns tmpName and a @@ -50,7 +50,7 @@ func Merge(dstDir string, files ...IndexFile) (tmpName, dstName string, _ error) return tmpName, dstName, nil } -func builderWriteAll(fn string, ib *IndexBuilder) error { +func builderWriteAll(fn string, ib *ShardBuilder) error { dir := filepath.Dir(fn) if err := os.MkdirAll(dir, 0o700); err != nil { return err @@ -89,17 +89,17 @@ func builderWriteAll(fn string, ib *IndexBuilder) error { return nil } -func merge(ds ...*indexData) (*IndexBuilder, error) { +func merge(ds ...*indexData) (*ShardBuilder, error) { if len(ds) == 0 { return nil, fmt.Errorf("need 1 or more indexData to merge") } sort.Slice(ds, func(i, j int) bool { - return ds[i].repoMetaData[0].priority > ds[j].repoMetaData[0].priority + return ds[i].repoMetaData[0].GetPriority() > ds[j].repoMetaData[0].GetPriority() }) - ib := newIndexBuilder() - ib.indexFormatVersion = NextIndexFormatVersion + sb := newShardBuilder(0) + sb.indexFormatVersion = NextIndexFormatVersion for _, d := range ds { lastRepoID := -1 @@ -116,36 +116,88 @@ func merge(ds ...*indexData) (*IndexBuilder, error) { } lastRepoID = repoID + // Initialize repo metadata if it does not already exist. + repo := d.repoMetaData[repoID] + if repo.Metadata == nil { + repo.Metadata = make(map[string]string) + } + // TODO we are losing empty repos on merging since we only get here if // there is an associated document. - if err := ib.setRepository(&d.repoMetaData[repoID]); err != nil { + if err := sb.setRepository(&d.repoMetaData[repoID]); err != nil { return nil, err } } - if err := addDocument(d, ib, repoID, docID); err != nil { + if err := addDocument(d, sb, repoID, docID); err != nil { return nil, err } } } - return ib, nil + return sb, nil } -// Explode takes an IndexFile f and creates 1 simple shard per repository -// contained in f. Explode returns a map of tmpName -> dstName. It is the -// responsibility of the caller to rename the temporary shard(s) and delete the -// input shard. -func Explode(dstDir string, f IndexFile) (map[string]string, error) { - return explode(dstDir, f) +// Explode takes an input shard and creates 1 simple shard per repository. It is +// a wrapper around explode that takes care of removing the input shard and +// renaming the temporary shards. +func Explode(dstDir string, inputShard string) error { + f, err := os.Open(inputShard) + if err != nil { + return err + } + defer f.Close() + + indexFile, err := NewIndexFile(f) + if err != nil { + return err + } + defer indexFile.Close() + + exploded, err := explode(dstDir, indexFile) + defer func() { + // best effort removal of tmp files. If os.Remove fails, indexserver will delete + // the leftover tmp files during the next cleanup. + for tmpFn := range exploded { + os.Remove(tmpFn) + } + }() + if err != nil { + return fmt.Errorf("zoekt.Explode: %w", err) + } + + // remove the input shard first to avoid duplicate indexes. In the worst case, + // the process is interrupted just after we delete the compound shard, in which + // case we have to reindex the lost repos. + paths, err := IndexFilePaths(inputShard) + if err != nil { + return err + } + for _, path := range paths { + err = os.Remove(path) + if err != nil { + return err + } + } + + // best effort rename shards. + for tmpFn, dstFn := range exploded { + if err := os.Rename(tmpFn, dstFn); err != nil { + log.Printf("explode: rename failed: %s", err) + } + } + + return nil } -type indexBuilderFunc func(ib *IndexBuilder) +type shardBuilderFunc func(ib *ShardBuilder) -// explode offers a richer signature compared to Explode for testing. You -// probably want to call Explode instead. -func explode(dstDir string, f IndexFile, ibFuncs ...indexBuilderFunc) (map[string]string, error) { +// explode takes an IndexFile f and creates 1 simple shard per repository +// contained in f. explode returns a map of tmpName -> dstName. It is the +// responsibility of the caller to rename the temporary shard(s) and delete the +// input shard. +func explode(dstDir string, f IndexFile, ibFuncs ...shardBuilderFunc) (map[string]string, error) { searcher, err := NewSearcher(f) if err != nil { return nil, err @@ -154,28 +206,26 @@ func explode(dstDir string, f IndexFile, ibFuncs ...indexBuilderFunc) (map[strin shardNames := make(map[string]string, len(d.repoMetaData)) - writeShard := func(ib *IndexBuilder) error { + writeShard := func(ib *ShardBuilder) error { if len(ib.repoList) != 1 { - return fmt.Errorf("expected ib to contain exactly 1 repository") + return fmt.Errorf("expected sb to contain exactly 1 repository") } for _, ibFunc := range ibFuncs { ibFunc(ib) } - prefix := "" - if tenant.EnforceTenant() { - prefix = tenant.SrcPrefix(ib.repoList[0].TenantID, ib.repoList[0].ID) - } else { - prefix = ib.repoList[0].Name + opts := Options{ + IndexDir: dstDir, + RepositoryDescription: ib.repoList[0], } - shardName := ShardName(dstDir, prefix, ib.indexFormatVersion, 0) + shardName := opts.shardNameVersion(ib.indexFormatVersion, 0) shardNameTmp := shardName + ".tmp" shardNames[shardNameTmp] = shardName return builderWriteAll(shardNameTmp, ib) } - var ib *IndexBuilder + var sb *ShardBuilder lastRepoID := -1 for docID := uint32(0); int(docID) < len(d.fileBranchMasks); docID++ { repoID := int(d.repos[docID]) @@ -190,27 +240,27 @@ func explode(dstDir string, f IndexFile, ibFuncs ...indexBuilderFunc) (map[strin } lastRepoID = repoID - if ib != nil { - if err := writeShard(ib); err != nil { + if sb != nil { + if err := writeShard(sb); err != nil { return shardNames, err } } - ib = newIndexBuilder() - ib.indexFormatVersion = IndexFormatVersion - if err := ib.setRepository(&d.repoMetaData[repoID]); err != nil { + sb = newShardBuilder(0) + sb.indexFormatVersion = IndexFormatVersion + if err := sb.setRepository(&d.repoMetaData[repoID]); err != nil { return shardNames, err } } - err := addDocument(d, ib, repoID, docID) + err := addDocument(d, sb, repoID, docID) if err != nil { return shardNames, err } } - if ib != nil { - if err := writeShard(ib); err != nil { + if sb != nil { + if err := writeShard(sb); err != nil { return shardNames, err } } @@ -218,7 +268,7 @@ func explode(dstDir string, f IndexFile, ibFuncs ...indexBuilderFunc) (map[strin return shardNames, nil } -func addDocument(d *indexData, ib *IndexBuilder, repoID int, docID uint32) error { +func addDocument(d *indexData, ib *ShardBuilder, repoID int, docID uint32) error { doc := Document{ Name: string(d.fileName(docID)), // Content set below since it can return an error @@ -237,7 +287,7 @@ func addDocument(d *indexData, ib *IndexBuilder, repoID int, docID uint32) error return err } - doc.SymbolsMetaData = make([]*Symbol, len(doc.Symbols)) + doc.SymbolsMetaData = make([]*zoekt.Symbol, len(doc.Symbols)) for i := range doc.SymbolsMetaData { doc.SymbolsMetaData[i] = d.symbols.data(d.fileEndSymbol[docID] + uint32(i)) } diff --git a/merge_test.go b/index/merge_test.go similarity index 90% rename from merge_test.go rename to index/merge_test.go index 07abf1ca6..31d92fffa 100644 --- a/merge_test.go +++ b/index/merge_test.go @@ -1,4 +1,4 @@ -package zoekt +package index import ( "os" @@ -6,6 +6,8 @@ import ( "testing" "github.com/google/go-cmp/cmp" + + "github.com/sourcegraph/zoekt" ) // We compare 2 simple shards before and after the transformation @@ -13,12 +15,12 @@ import ( // identical. func TestExplode(t *testing.T) { simpleShards := []string{ - "./testdata/shards/repo_v16.00000.zoekt", - "./testdata/shards/repo2_v16.00000.zoekt", + ".././testdata/shards/repo_v16.00000.zoekt", + ".././testdata/shards/repo2_v16.00000.zoekt", } // repo name -> IndexMetadata - m := make(map[string]*IndexMetadata, 2) + m := make(map[string]*zoekt.IndexMetadata, 2) // merge var files []IndexFile @@ -72,7 +74,7 @@ func TestExplode(t *testing.T) { } defer indexFile.Close() - overwriteIndexTimeAndID := func(ib *IndexBuilder) { + overwriteIndexTimeAndID := func(ib *ShardBuilder) { ib.ID = m[ib.repoList[0].Name].ID ib.IndexTime = m[ib.repoList[0].Name].IndexTime } diff --git a/index/postings_bench_test.go b/index/postings_bench_test.go new file mode 100644 index 000000000..e40b910ef --- /dev/null +++ b/index/postings_bench_test.go @@ -0,0 +1,150 @@ +package index + +import ( + "bytes" + "fmt" + "io/fs" + "os" + "path/filepath" + "testing" +) + +// Set ZOEKT_BENCH_REPO to a source tree (e.g. a kubernetes checkout) to enable. +// +// git clone --depth=1 https://github.com/kubernetes/kubernetes /tmp/k8s +// ZOEKT_BENCH_REPO=/tmp/k8s go test ./index/ -bench=BenchmarkPostings -benchmem -count=5 -timeout=600s + +func requireBenchRepo(b *testing.B) string { + b.Helper() + dir := os.Getenv("ZOEKT_BENCH_REPO") + if dir == "" { + b.Skip("ZOEKT_BENCH_REPO not set") + } + return dir +} + +// loadRepoFiles walks dir and returns file contents, skipping binary files, +// empty files, and anything over 1 MB. Returns at most maxFiles entries. +func loadRepoFiles(b *testing.B, dir string, maxFiles int) [][]byte { + b.Helper() + var files [][]byte + err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return nil + } + if d.IsDir() { + switch d.Name() { + case ".git", "vendor", "node_modules": + return filepath.SkipDir + } + return nil + } + if len(files) >= maxFiles { + return filepath.SkipAll + } + info, err := d.Info() + if err != nil || info.Size() == 0 || info.Size() > 1<<20 { + return nil + } + data, err := os.ReadFile(path) + if err != nil { + return nil + } + if bytes.IndexByte(data, 0) >= 0 { + return nil // binary + } + files = append(files, data) + return nil + }) + if err != nil { + b.Fatalf("walking repo: %v", err) + } + if len(files) == 0 { + b.Fatal("no files found in repo") + } + return files +} + +func totalSize(files [][]byte) int64 { + var n int64 + for _, f := range files { + n += int64(len(f)) + } + return n +} + +// BenchmarkPostings_NewSearchableString measures the core hot path: trigram +// extraction, map lookups, delta encoding, and per-trigram slice growth. +// Sub-benchmarks vary corpus size to show scaling with map size. +func BenchmarkPostings_NewSearchableString(b *testing.B) { + dir := requireBenchRepo(b) + allFiles := loadRepoFiles(b, dir, 50_000) + b.Logf("loaded %d files, %.1f MB", len(allFiles), float64(totalSize(allFiles))/(1<<20)) + + for _, n := range []int{1_000, 5_000, len(allFiles)} { + n = min(n, len(allFiles)) + files := allFiles[:n] + size := totalSize(files) + + b.Run(fmt.Sprintf("files=%d", n), func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + pb := newPostingsBuilder(defaultShardMax) + for _, data := range files { + _, _, _ = pb.newSearchableString(data, nil) + } + } + b.ReportMetric(float64(size), "input-bytes/op") + }) + } +} + +// BenchmarkPostings_Reuse measures the warm path: building postings with a +// reset (pooled) postingsBuilder that retains its map and slice allocations +// from a previous shard build. +func BenchmarkPostings_Reuse(b *testing.B) { + dir := requireBenchRepo(b) + allFiles := loadRepoFiles(b, dir, 50_000) + size := totalSize(allFiles) + b.Logf("loaded %d files, %.1f MB", len(allFiles), float64(size)/(1<<20)) + + // Warm up the builder so it has allocated map entries and slices. + pb := newPostingsBuilder(defaultShardMax) + for _, data := range allFiles { + _, _, _ = pb.newSearchableString(data, nil) + } + + b.ResetTimer() + b.ReportAllocs() + for b.Loop() { + pb.reset() + for _, data := range allFiles { + _, _, _ = pb.newSearchableString(data, nil) + } + } + b.ReportMetric(float64(size), "input-bytes/op") +} + +// BenchmarkPostings_WritePostings measures the marshaling path: sorting ngram +// keys and writing varint-encoded posting lists. +func BenchmarkPostings_WritePostings(b *testing.B) { + dir := requireBenchRepo(b) + allFiles := loadRepoFiles(b, dir, 50_000) + + pb := newPostingsBuilder(defaultShardMax) + for _, data := range allFiles { + _, _, _ = pb.newSearchableString(data, nil) + } + b.Logf("built %d unique ngrams from %d files, %.1f MB", len(pb.postings), len(allFiles), float64(totalSize(allFiles))/(1<<20)) + + buf := &bytes.Buffer{} + b.ResetTimer() + b.ReportAllocs() + for b.Loop() { + buf.Reset() + w := &writer{w: buf} + var ngramText, charOffsets, endRunes simpleSection + var postings compoundSection + writePostings(w, pb, &ngramText, &charOffsets, &postings, &endRunes) + } +} diff --git a/read.go b/index/read.go similarity index 81% rename from read.go rename to index/read.go index 12cb32eb4..3a9c05b91 100644 --- a/read.go +++ b/index/read.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "encoding/binary" @@ -24,7 +24,12 @@ import ( "slices" "sort" + "github.com/RoaringBitmap/roaring" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "github.com/rs/xid" + + "github.com/sourcegraph/zoekt" ) // IndexFile is a file suitable for concurrent read access. For performance @@ -231,7 +236,7 @@ func readSectionU64(f IndexFile, sec simpleSection) ([]uint64, error) { return arr, nil } -func (r *reader) readJSON(data interface{}, sec simpleSection) error { +func (r *reader) readJSON(data any, sec simpleSection) error { blob, err := r.r.Read(sec.off, sec.sz) if err != nil { return err @@ -242,7 +247,7 @@ func (r *reader) readJSON(data interface{}, sec simpleSection) error { // canReadVersion returns checks if zoekt can read in md. If it can't a // non-nil error is returned. -func canReadVersion(md *IndexMetadata) bool { +func canReadVersion(md *zoekt.IndexMetadata) bool { // Backwards compatible with v16 return md.IndexFormatVersion == IndexFormatVersion || md.IndexFormatVersion == NextIndexFormatVersion } @@ -252,6 +257,10 @@ func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) { file: r.r, branchIDs: []map[string]uint{}, branchNames: []map[uint]string{}, + + // docMatchTreeCache is disabled by default. + // The number of max entries can be set with environment variable ZOEKT_DOCMATCHTREE_CACHE + docMatchTreeCache: newDocMatchTreeCache(0), } repos, md, err := r.parseMetadata(toc.metaData, toc.repoMetaData) @@ -262,7 +271,7 @@ func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) { } d.metaData = *md - d.repoMetaData = make([]Repository, 0, len(repos)) + d.repoMetaData = make([]zoekt.Repository, 0, len(repos)) for _, r := range repos { d.repoMetaData = append(d.repoMetaData, *r) } @@ -311,6 +320,11 @@ func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) { return nil, err } + d.categories, err = d.readSectionBlob(toc.categories) + if err != nil { + return nil, err + } + d.contentNgrams, err = d.newBtreeIndex(toc.ngramText, toc.postings) if err != nil { return nil, err @@ -373,7 +387,7 @@ func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) { d.fileNameRuneOffsets = makeRuneOffsetMap(fileNameRuneOffsets) d.subRepoPaths = make([][]string, 0, len(d.repoMetaData)) - for i := 0; i < len(d.repoMetaData); i++ { + for i := range d.repoMetaData { keys := make([]string, 0, len(d.repoMetaData[i].SubRepoMap)+1) keys = append(keys, "") for k := range d.repoMetaData[i].SubRepoMap { @@ -412,8 +426,8 @@ func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) { return &d, nil } -func (r *reader) parseMetadata(metaData simpleSection, repoMetaData simpleSection) ([]*Repository, *IndexMetadata, error) { - var md IndexMetadata +func (r *reader) parseMetadata(metaData simpleSection, repoMetaData simpleSection) ([]*zoekt.Repository, *zoekt.IndexMetadata, error) { + var md zoekt.IndexMetadata if err := r.readJSON(&md, metaData); err != nil { return nil, nil, err } @@ -433,13 +447,13 @@ func (r *reader) parseMetadata(metaData simpleSection, repoMetaData simpleSectio } } - var repos []*Repository + var repos []*zoekt.Repository if md.IndexFormatVersion >= 17 { if err := json.Unmarshal(blob, &repos); err != nil { return nil, &md, err } } else { - repos = make([]*Repository, 1) + repos = make([]*zoekt.Repository, 1) if err := json.Unmarshal(blob, &repos[0]); err != nil { return nil, &md, err } @@ -533,7 +547,14 @@ func (d *indexData) readNewlines(i uint32, buf []uint32) ([]uint32, uint32, erro return nil, 0, err } - return fromSizedDeltas(blob, buf), sec.sz, nil + nl := fromSizedDeltas(blob, buf) + + // can be nil if buf is nil and there are no doc sections. However, we rely + // on it being non-nil to cache the read. + if nl == nil { + nl = make([]uint32, 0) + } + return nl, sec.sz, nil } func (d *indexData) readDocSections(i uint32, buf []DocumentSection) ([]DocumentSection, uint32, error) { @@ -561,7 +582,7 @@ func (d *indexData) readDocSections(i uint32, buf []DocumentSection) ([]Document // results coming from this searcher are valid only for the lifetime // of the Searcher itself, ie. []byte members should be copied into // fresh buffers if the result is to survive closing the shard. -func NewSearcher(r IndexFile) (Searcher, error) { +func NewSearcher(r IndexFile) (zoekt.Searcher, error) { rd := &reader{r: r} var toc indexTOC @@ -578,7 +599,7 @@ func NewSearcher(r IndexFile) (Searcher, error) { // ReadMetadata returns the metadata of index shard without reading // the index data. The IndexFile is not closed. -func ReadMetadata(inf IndexFile) ([]*Repository, *IndexMetadata, error) { +func ReadMetadata(inf IndexFile) ([]*zoekt.Repository, *zoekt.IndexMetadata, error) { rd := &reader{r: inf} var toc indexTOC err := rd.readTOCSections(&toc, []string{"metaData", "repoMetaData"}) @@ -590,7 +611,7 @@ func ReadMetadata(inf IndexFile) ([]*Repository, *IndexMetadata, error) { // ReadMetadataPathAlive is like ReadMetadataPath except that it only returns // alive repositories. -func ReadMetadataPathAlive(p string) ([]*Repository, *IndexMetadata, error) { +func ReadMetadataPathAlive(p string) ([]*zoekt.Repository, *zoekt.IndexMetadata, error) { repos, id, err := ReadMetadataPath(p) if err != nil { return nil, nil, err @@ -607,7 +628,7 @@ func ReadMetadataPathAlive(p string) ([]*Repository, *IndexMetadata, error) { // ReadMetadataPath returns the metadata of index shard at p without reading // the index data. ReadMetadataPath is a helper for ReadMetadata which opens // the IndexFile at p. -func ReadMetadataPath(p string) ([]*Repository, *IndexMetadata, error) { +func ReadMetadataPath(p string) ([]*zoekt.Repository, *zoekt.IndexMetadata, error) { f, err := os.Open(p) if err != nil { return nil, nil, err @@ -641,6 +662,102 @@ func IndexFilePaths(p string) ([]string, error) { return exist, nil } +// maybeContainsRepo is a performance optimization mainly intended to be used by +// containsRepo to avoid unmarshalling large metadata files for compound shards. +// It is best-effort, so if it encounters any error returns true (ie indicating +// you need to do more checks). +func maybeContainsRepo(inf IndexFile, repoID uint32) bool { + rd := &reader{r: inf} + var toc indexTOC + err := rd.readTOCSections(&toc, []string{"reposIDsBitmap"}) + if err != nil { + return true + } + + // shard does not yet contain reposIDsBitmap so we can't tell if it contains + // repo. + if toc.reposIDsBitmap.sz == 0 { + return true + } + + blob, err := inf.Read(toc.reposIDsBitmap.off, toc.reposIDsBitmap.sz) + if err != nil { + return true + } + + var rb roaring.Bitmap + _, err = rb.FromUnsafeBytes(blob) + if err != nil { + return true + } + + return rb.Contains(repoID) +} + +var metricCompoundShardLookups = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "zoekt_compound_shard_lookups", + Help: "Number of compound shard lookups and how much work was done.", +}, []string{"state"}) + +// containsRepo returns true if the shard at path contains a repo with id. The +// function returns false if the shard does not contain the repo or if it +// encounters an error. +func containsRepo(p string, id uint32) bool { + var err error + earlyReturn := false + + defer func() { + if err != nil { + metricCompoundShardLookups.WithLabelValues("error").Inc() + return + } + if earlyReturn { + metricCompoundShardLookups.WithLabelValues("skipped").Inc() + return + } + metricCompoundShardLookups.WithLabelValues("full_lookup").Inc() + }() + + f, err := os.Open(p) + if err != nil { + return false + } + defer f.Close() + + inf, err := NewIndexFile(f) + if err != nil { + return false + } + defer inf.Close() + + // PERF: Looping over repos can be relatively slow on instances with thousands + // of tiny repos in compound shards. This is a much faster check to see if we + // need to do more work. + // + // If we are still seeing performance issues, we should consider adding + // some sort of global oracle here to avoid filepath.Glob and checking + // each compound shard. + if !maybeContainsRepo(inf, id) { + earlyReturn = true + return false + } + + repos, _, err := ReadMetadata(inf) + if err != nil { + return false + } + for _, repo := range repos { + if repo.Tombstone { + continue + } + if repo.ID == id { + return true + } + } + + return false +} + func loadIndexData(r IndexFile) (*indexData, error) { rd := &reader{r: r} diff --git a/read_test.go b/index/read_test.go similarity index 93% rename from read_test.go rename to index/read_test.go index 7b3827b1d..087b8a65b 100644 --- a/read_test.go +++ b/index/read_test.go @@ -12,13 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bytes" "context" "encoding/json" - "flag" "fmt" "io/fs" "os" @@ -31,15 +30,14 @@ import ( "github.com/google/go-cmp/cmp" + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/query" ) -var update = flag.Bool("update", false, "update golden files") - func TestReadWrite(t *testing.T) { - b, err := NewIndexBuilder(nil) + b, err := NewShardBuilder(nil) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } if err := b.AddFile("filename", []byte("abcde")); err != nil { @@ -82,9 +80,9 @@ func TestReadWrite(t *testing.T) { } func TestReadWriteNames(t *testing.T) { - b, err := NewIndexBuilder(nil) + b, err := NewShardBuilder(nil) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } if err := b.AddFile("abCd", []byte("")); err != nil { @@ -126,9 +124,9 @@ func TestReadWriteNames(t *testing.T) { } func TestGet(t *testing.T) { - b, err := NewIndexBuilder(nil) + b, err := NewShardBuilder(nil) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } if err := b.AddFile("file_name", []byte("aaa bbbaaa")); err != nil { @@ -199,7 +197,7 @@ func TestGet(t *testing.T) { } } -func loadShard(fn string) (Searcher, error) { +func loadShard(fn string) (zoekt.Searcher, error) { f, err := os.Open(fn) if err != nil { return nil, err @@ -222,7 +220,7 @@ func TestReadSearch(t *testing.T) { type out struct { FormatVersion int FeatureVersion int - FileMatches [][]FileMatch + FileMatches [][]zoekt.FileMatch } qs := []query.Q{ @@ -259,7 +257,7 @@ func TestReadSearch(t *testing.T) { FeatureVersion: index.metaData.IndexFeatureVersion, } for _, q := range qs { - res, err := shard.Search(context.Background(), q, &SearchOptions{}) + res, err := shard.Search(context.Background(), q, &zoekt.SearchOptions{}) if err != nil { t.Fatalf("failed search %s on %s during updating: %v", q, name, err) } @@ -291,7 +289,7 @@ func TestReadSearch(t *testing.T) { } for j, q := range qs { - res, err := shard.Search(context.Background(), q, &SearchOptions{}) + res, err := shard.Search(context.Background(), q, &zoekt.SearchOptions{}) if err != nil { t.Fatalf("failed search %s on %s: %v", q, name, err) } @@ -356,9 +354,9 @@ func TestEncodeRawConfig(t *testing.T) { func TestBackwardsCompat(t *testing.T) { if *update { - b, err := NewIndexBuilder(nil) + b, err := NewShardBuilder(nil) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } if err := b.AddFile("filename", []byte("abcde")); err != nil { @@ -370,7 +368,13 @@ func TestBackwardsCompat(t *testing.T) { t.Fatal(err) } - outName := ShardName("testdata/backcompat", "new", IndexFormatVersion, 0) + opts := Options{ + IndexDir: "testdata/backcompat", + RepositoryDescription: zoekt.Repository{ + Name: "new", + }, + } + outName := opts.shardName(0) t.Log("writing new file", outName) err = os.WriteFile(outName, buf.Bytes(), 0o644) diff --git a/index/score.go b/index/score.go new file mode 100644 index 000000000..0bfdde2cf --- /dev/null +++ b/index/score.go @@ -0,0 +1,412 @@ +// Copyright 2016 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package index + +import ( + "bytes" + "fmt" + "math" + "strings" + + "github.com/go-enry/go-enry/v2" + + "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/internal/ctags" +) + +const ( + ScoreOffset = 10_000_000 +) + +type chunkScore struct { + score float64 + debugScore string + bestLine int +} + +// scoreChunk calculates the score for each line in the chunk based on its candidate matches, and returns the score of +// the best-scoring line, along with its line number. +// Invariant: there should be at least one input candidate, len(ms) > 0. +func (p *contentProvider) scoreChunk(ms []*candidateMatch, language string, opts *zoekt.SearchOptions) (chunkScore, []*zoekt.Symbol) { + nl := p.newlines() + + var bestScore lineScore + bestLine := 0 + var symbolInfo []*zoekt.Symbol + + start := 0 + currentLine := -1 + for i, m := range ms { + lineNumber := -1 + if !m.fileName { + lineNumber = nl.atOffset(m.byteOffset) + } + + // If this match represents a new line, then score the previous line and update 'start'. + if i != 0 && lineNumber != currentLine { + score, si := p.scoreLine(ms[start:i], language, currentLine, opts) + symbolInfo = append(symbolInfo, si...) + if score.score > bestScore.score { + bestScore = score + bestLine = currentLine + } + start = i + } + currentLine = lineNumber + } + + // Make sure to score the last line + line, si := p.scoreLine(ms[start:], language, currentLine, opts) + symbolInfo = append(symbolInfo, si...) + if line.score > bestScore.score { + bestScore = line + bestLine = currentLine + } + + cs := chunkScore{ + score: bestScore.score, + bestLine: bestLine, + } + if opts.DebugScore { + cs.debugScore = fmt.Sprintf("%s, (line: %d)", bestScore.debugScore, bestLine) + } + return cs, symbolInfo +} + +type lineScore struct { + score float64 + debugScore string +} + +// scoreLine calculates a score for the line based on its candidate matches. +// Invariants: +// - All candidate matches are assumed to come from the same line in the content. +// - If this line represents a filename, then lineNumber must be -1. +// - There should be at least one input candidate, len(ms) > 0. +func (p *contentProvider) scoreLine(ms []*candidateMatch, language string, lineNumber int, opts *zoekt.SearchOptions) (lineScore, []*zoekt.Symbol) { + if opts.UseBM25Scoring { + score, symbolInfo := p.scoreLineBM25(ms, lineNumber) + ls := lineScore{score: score} + if opts.DebugScore { + ls.debugScore = fmt.Sprintf("tfScore:%.2f, ", score) + } + return ls, symbolInfo + } + + score := 0.0 + what := "" + addScore := func(w string, s float64) { + if s != 0 && opts.DebugScore { + what += fmt.Sprintf("%s:%.2f, ", w, s) + } + score += s + } + + filename := p.data(true) + var symbolInfo []*zoekt.Symbol + + var bestScore lineScore + for i, m := range ms { + data := p.data(m.fileName) + + endOffset := m.byteOffset + m.byteMatchSz + startBoundary := m.byteOffset < uint32(len(data)) && (m.byteOffset == 0 || byteClass(data[m.byteOffset-1]) != byteClass(data[m.byteOffset])) + endBoundary := endOffset > 0 && (endOffset == uint32(len(data)) || byteClass(data[endOffset-1]) != byteClass(data[endOffset])) + + score = 0 + what = "" + + if startBoundary && endBoundary { + addScore("WordMatch", scoreWordMatch) + } else if startBoundary || endBoundary { + addScore("PartialWordMatch", scorePartialWordMatch) + } + + if m.fileName { + sep := bytes.LastIndexByte(data, '/') + startMatch := int(m.byteOffset) == sep+1 + endMatch := endOffset == uint32(len(data)) + if startMatch && endMatch { + addScore("Base", scoreBase) + } else if startMatch || endMatch { + addScore("EdgeBase", (scoreBase+scorePartialBase)/2) + } else if sep < int(m.byteOffset) { + addScore("InnerBase", scorePartialBase) + } + } else if sec, si, ok := p.findSymbol(m); ok { + startMatch := sec.Start == m.byteOffset + endMatch := sec.End == endOffset + if startMatch && endMatch { + addScore("Symbol", scoreSymbol) + } else if startMatch || endMatch { + addScore("EdgeSymbol", (scoreSymbol+scorePartialSymbol)/2) + } else { + addScore("OverlapSymbol", scorePartialSymbol) + } + + // Score based on symbol data + if si != nil { + symbolKind := ctags.ParseSymbolKind(si.Kind) + sym := sectionSlice(data, sec) + + addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreSymbolKind(language, filename, sym, symbolKind)) + + // This is from a symbol tree, so we need to store the symbol + // information. + if m.symbol { + if symbolInfo == nil { + symbolInfo = make([]*zoekt.Symbol, len(ms)) + } + // findSymbols does not hydrate in Sym. So we need to store it. + si.Sym = string(sym) + symbolInfo[i] = si + } + } + } + + // scoreWeight != 1 means it affects score + if !epsilonEqualsOne(m.scoreWeight) { + score = score * m.scoreWeight + if opts.DebugScore { + what += fmt.Sprintf("boost:%.2f, ", m.scoreWeight) + } + } + + if score > bestScore.score { + bestScore.score = score + bestScore.debugScore = what + } + } + + if opts.DebugScore { + bestScore.debugScore = fmt.Sprintf("score:%.2f <- %s", bestScore.score, strings.TrimSuffix(bestScore.debugScore, ", ")) + } + + return bestScore, symbolInfo +} + +// scoreLineBM25 computes the score of a line according to BM25, the most common scoring algorithm for text search: +// https://en.wikipedia.org/wiki/Okapi_BM25. Compared to the standard scoreLine algorithm, this score rewards multiple +// term matches on a line. +// Notes: +// - This BM25 calculation skips inverse document frequency (idf) to keep the implementation simple. +// - It uses the same calculateTermFrequency method as BM25 file scoring, which boosts filename and symbol matches. +func (p *contentProvider) scoreLineBM25(ms []*candidateMatch, lineNumber int) (float64, []*zoekt.Symbol) { + // If this is a filename, then don't compute BM25. The score would not be comparable to line scores. + if lineNumber < 0 { + return 0, nil + } + + // Use standard parameter defaults used in Lucene (https://lucene.apache.org/core/10_1_0/core/org/apache/lucene/search/similarities/BM25Similarity.html) + k, b := 1.2, 0.75 + + // Calculate the length ratio of this line. As a heuristic, we assume an average line length of 100 characters. + // Usually the calculation would be based on terms, but using bytes should work fine, as we're just computing a ratio. + nl := p.newlines() + lineLength := nl.lineStart(lineNumber+1) - nl.lineStart(lineNumber) + L := float64(lineLength) / 100.0 + + score := 0.0 + tfs := p.calculateTermFrequency(ms, false) // ignore file priority, since we're just scoring within a single file + for _, f := range tfs { + score += tfScore(k, b, L, f) + } + + // Check if any index comes from a symbol match tree, and if so hydrate in symbol information + var symbolInfo []*zoekt.Symbol + for _, m := range ms { + if m.symbol { + if sec, si, ok := p.findSymbol(m); ok && si != nil { + // findSymbols does not hydrate in Sym. So we need to store it. + sym := sectionSlice(p.data(false), sec) + si.Sym = string(sym) + symbolInfo = append(symbolInfo, si) + } + } + } + + score = boostScore(score, ms) + return score, symbolInfo +} + +// tfScore is the term frequency score for BM25. +func tfScore(k float64, b float64, L float64, f int) float64 { + return ((k + 1.0) * float64(f)) / (k*(1.0-b+b*L) + float64(f)) +} + +const importantTermBoost = 5 +const lowPriorityFilePenalty = 5 + +// calculateTermFrequency computes the term frequency for the file match. +// Notes: +// - Filename matches count more than content matches. This mimics a common text search strategy to 'boost' matches on document titles. +// - Symbol matches also count more than content matches, to reward matches on symbol definitions. +// - "Low priority" files like tests, generated files, etc. have their term frequency down-weighted, to prioritize matches from 'regular' files +func (p *contentProvider) calculateTermFrequency(cands []*candidateMatch, lowPriority bool) map[string]int { + // Treat each candidate match as a term and compute the frequencies. For now, ignore case sensitivity and + // ignore whether the index is a word boundary. + termFreqs := map[string]int{} + for _, m := range cands { + term := string(m.substrLowered) + if m.fileName || p.matchesSymbol(m) { + termFreqs[term] += importantTermBoost + } else { + termFreqs[term]++ + } + } + + // If a file is a test, generated, etc., then down-weight its term frequency. The BM25F interpretation + // is that this data lives in a separate 'field' that is half the priority of regular content. + if lowPriority { + for term := range termFreqs { + termFreqs[term] = termFreqs[term] / lowPriorityFilePenalty + } + } + + return termFreqs +} + +// boostScore finds whether any of the matches are part of a boosted match tree, then applies +// the boost to the final score. This follows precedent in other search engines like Lucene, where +// boosts multiply an entire query clause's final score. +// +// As a heuristic, we use the maximum boost across matches to avoid applying the same boost multiple times. +func boostScore(score float64, ms []*candidateMatch) float64 { + maxScoreWeight := 1.0 + for _, m := range ms { + if m.scoreWeight > maxScoreWeight { + maxScoreWeight = m.scoreWeight + } + } + + if !epsilonEqualsOne(maxScoreWeight) { + score = score * maxScoreWeight + } + return score +} + +// scoreFile computes a score for the file match using various scoring signals, like +// whether there's an exact match on a symbol, the number of query clauses that matched, etc. +func (d *indexData) scoreFile(fileMatch *zoekt.FileMatch, doc uint32, mt matchTree, known map[matchTree]bool, opts *zoekt.SearchOptions) { + atomMatchCount := 0 + visitMatchAtoms(mt, known, func(mt matchTree) { + atomMatchCount++ + }) + + addScore := func(what string, computed float64) { + fileMatch.AddScore(what, computed, -1, opts.DebugScore) + } + + // atom-count boosts files with matches from more than 1 atom. The + // maximum boost is scoreFactorAtomMatch. + if atomMatchCount > 0 { + fileMatch.AddScore("atom", (1.0-1.0/float64(atomMatchCount))*scoreFactorAtomMatch, float64(atomMatchCount), opts.DebugScore) + } + + maxFileScore := 0.0 + for i := range fileMatch.LineMatches { + if maxFileScore < fileMatch.LineMatches[i].Score { + maxFileScore = fileMatch.LineMatches[i].Score + } + + // Order by ordering in file. + fileMatch.LineMatches[i].Score += scoreLineOrderFactor * (1.0 - (float64(i) / float64(len(fileMatch.LineMatches)))) + } + + for i := range fileMatch.ChunkMatches { + if maxFileScore < fileMatch.ChunkMatches[i].Score { + maxFileScore = fileMatch.ChunkMatches[i].Score + } + + // Order by ordering in file. + fileMatch.ChunkMatches[i].Score += scoreLineOrderFactor * (1.0 - (float64(i) / float64(len(fileMatch.ChunkMatches)))) + } + + // Maintain ordering of input files. This strictly dominates the in-file ordering of the matches. + addScore("fragment", maxFileScore) + + // Truncate score to avoid overlap with the tiebreakers. + fileMatch.Score = math.Trunc(fileMatch.Score) + + // Add tiebreakers + repoRank := d.repoMetaData[d.repos[doc]].Rank // [0, 65535] + docOrderScore := 1.0 - float64(doc)/float64(len(d.boundaries)) // [0, 1] + + if opts.DebugScore { + // We log the score components individually for better readability. + fileMatch.Debug = fmt.Sprintf("score: %d (repo-rank: %d, file-rank: %.2f) <- %s", int(fileMatch.Score), repoRank, docOrderScore, strings.TrimSuffix(fileMatch.Debug, ", ")) + } + + fileMatch.Score = ScoreOffset*fileMatch.Score + scoreRepoRankFactor*float64(repoRank) + scoreFileOrderFactor*docOrderScore +} + +// scoreFileBM25 computes the score according to BM25, the most common scoring algorithm for text search: +// https://en.wikipedia.org/wiki/Okapi_BM25. Note that we treat the inverse document frequency (idf) as constant. This +// is supported by our evaluations which showed that for keyword style queries, idf can down-weight the score of some +// keywords too much, leading to a worse ranking. The intuition is that each keyword is important independently of how +// frequent it appears in the corpus. +// +// Unlike standard file scoring, this scoring strategy ignores the individual LineMatch and ChunkMatch scores, instead +// calculating a score over all matches in the file. +func (d *indexData) scoreFileBM25(fileMatch *zoekt.FileMatch, doc uint32, cands []*candidateMatch, cp *contentProvider, opts *zoekt.SearchOptions) { + lowPriority := d.isLowPriority(fileMatch, doc) + tf := cp.calculateTermFrequency(cands, lowPriority) + + // Use standard parameter defaults used in Lucene (https://lucene.apache.org/core/10_1_0/core/org/apache/lucene/search/similarities/BM25Similarity.html) + k, b := 1.2, 0.75 + + averageFileLength := float64(d.boundaries[d.numDocs()]) / float64(d.numDocs()) + // This is very unlikely, but explicitly guard against division by zero. + if averageFileLength == 0 { + averageFileLength++ + } + + // Compute the file length ratio. Usually the calculation would be based on terms, but using + // bytes should work fine, as we're just computing a ratio. + fileLength := float64(d.boundaries[doc+1] - d.boundaries[doc]) + + L := fileLength / averageFileLength + + bm25Score := 0.0 + sumTF := 0 // Just for debugging + for _, f := range tf { + sumTF += f + bm25Score += tfScore(k, b, L, f) + } + + score := boostScore(bm25Score, cands) + boosted := score != bm25Score + fileMatch.Score = score + + if opts.DebugScore { + // To make the debug output easier to read, we split the score into the query dependent score and the tiebreaker + fileMatch.Debug = fmt.Sprintf("bm25-score: %.2f (low-priority: %t) <- sum-termFrequencies: %d, length-ratio: %.2f", score, lowPriority, sumTF, L) + if boosted { + fileMatch.Debug += fmt.Sprintf(" (boosted)") + } + } +} + +func (d *indexData) isLowPriority(fileMatch *zoekt.FileMatch, doc uint32) bool { + category := d.getCategory(doc) + if category != FileCategoryMissing { + return category.lowPriority() + } else { + // The category may be missing from older index versions. In this case, + // perform a cheap, best-effort check against the filename. + path := fileMatch.FileName + return enry.IsTest(path) || enry.IsVendor(path) + } +} diff --git a/section.go b/index/section.go similarity index 99% rename from section.go rename to index/section.go index 380054f8f..02fee05eb 100644 --- a/section.go +++ b/index/section.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "encoding/binary" diff --git a/indexbuilder.go b/index/shard_builder.go similarity index 60% rename from indexbuilder.go rename to index/shard_builder.go index 027edf9f4..80051e32b 100644 --- a/indexbuilder.go +++ b/index/shard_builder.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bytes" @@ -23,13 +23,15 @@ import ( "net/url" "os" "path/filepath" + "slices" "sort" "strings" "text/template" "time" "unicode/utf8" - "github.com/sourcegraph/zoekt/internal/languages" + "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/languages" ) var _ = log.Println @@ -57,9 +59,43 @@ func HostnameBestEffort() string { // Store character (unicode codepoint) offset (in bytes) this often. const runeOffsetFrequency = 100 +// postingList holds the varint-encoded delta data and last offset for a +// single ngram. Stored by pointer in the asciiPostings array or the +// postings map so appending to data does not require rewriting the +// map entry or array slot. +type postingList struct { + data []byte + lastOff uint32 +} + +// asciiNgramBits is the number of bits needed to index all ASCII trigrams. +// ASCII runes are 0-127 (7 bits), so 3 runes = 21 bits = 2M entries. +const asciiNgramBits = 21 + +// asciiNgramIndex packs three ASCII bytes into a 21-bit array index. +func asciiNgramIndex(a, b, c byte) uint32 { + return uint32(a)<<14 | uint32(b)<<7 | uint32(c) +} + +// asciiIndexToNgram converts a 21-bit ASCII array index back to the +// canonical ngram encoding (rune[0]<<42 | rune[1]<<21 | rune[2]). +func asciiIndexToNgram(idx uint32) ngram { + r0 := uint64(idx >> 14) + r1 := uint64((idx >> 7) & 0x7f) + r2 := uint64(idx & 0x7f) + return ngram(r0<<42 | r1<<21 | r2) +} + type postingsBuilder struct { - postings map[ngram][]byte - lastOffsets map[ngram]uint32 + // ASCII trigrams use direct-indexed array (zero hash/probe cost). + // Non-ASCII trigrams fall back to the map. + asciiPostings [1 << asciiNgramBits]*postingList + postings map[ngram]*postingList + + // asciiPopulated tracks which indices in asciiPostings are non-nil, + // so reset() and writePostings iterate only populated slots — O(n) + // where n is unique ASCII trigrams (~275K) instead of O(2M). + asciiPopulated []uint32 // To support UTF-8 searching, we must map back runes to byte // offsets. As a first attempt, we sample regularly. The @@ -74,14 +110,56 @@ type postingsBuilder struct { endByte uint32 } -func newPostingsBuilder() *postingsBuilder { +// Initial capacity for each posting list's byte slice. On the +// kubernetes corpus (282K unique trigrams), the median posting list is +// 10 bytes and 78% are under 64 bytes (power-law distribution). +// Pre-allocating 64 covers the majority without the 244 MB waste that +// a mean-based value (1024) would cause. +const initialPostingCap = 64 + +// estimateNgrams returns a pre-size hint for the non-ASCII postings map, +// derived from the maximum shard content size. Intentionally over-estimates +// (the map only holds non-ASCII trigrams) to avoid rehashing. +func estimateNgrams(shardMaxBytes int) int { + n := shardMaxBytes / 600 + if n < 1024 { + n = 1024 + } + return n +} + +func newPostingsBuilder(shardMaxBytes int) *postingsBuilder { return &postingsBuilder{ - postings: map[ngram][]byte{}, - lastOffsets: map[ngram]uint32{}, + postings: make(map[ngram]*postingList, estimateNgrams(shardMaxBytes)), isPlainASCII: true, } } +// reset clears the builder for reuse. All postingList allocations +// (backing arrays, map entries, ASCII array slots) are retained so the +// next shard build avoids re-allocating them. +// Uses asciiPopulated to reset only populated slots — O(populated) +// instead of O(2M). Slots are kept non-nil with data truncated to +// len 0; the hot path uses len(pl.data)==0 to re-record them in +// asciiPopulated for the next shard. +func (s *postingsBuilder) reset() { + for _, idx := range s.asciiPopulated { + pl := s.asciiPostings[idx] + pl.data = pl.data[:0] + pl.lastOff = 0 + } + s.asciiPopulated = s.asciiPopulated[:0] + for _, pl := range s.postings { + pl.data = pl.data[:0] + pl.lastOff = 0 + } + s.runeOffsets = s.runeOffsets[:0] + s.runeCount = 0 + s.isPlainASCII = true + s.endRunes = s.endRunes[:0] + s.endByte = 0 +} + // Store trigram offsets for the given UTF-8 data. The // DocumentSections must correspond to rune boundaries in the UTF-8 // data. @@ -104,8 +182,14 @@ func (s *postingsBuilder) newSearchableString(data []byte, byteSections []Docume endRune := s.runeCount for ; len(data) > 0; runeIndex++ { - c, sz := utf8.DecodeRune(data) - if sz > 1 { + // ASCII fast path: avoid utf8.DecodeRune call overhead. + // For source code, 95-99% of bytes are ASCII. + var c rune + sz := 1 + if data[0] < utf8.RuneSelf { + c = rune(data[0]) + } else { + c, sz = utf8.DecodeRune(data) s.isPlainASCII = false } data = data[sz:] @@ -127,13 +211,40 @@ func (s *postingsBuilder) newSearchableString(data []byte, byteSections []Docume continue } - ng := runesToNGram(runeGram) - lastOff := s.lastOffsets[ng] newOff := endRune + uint32(runeIndex) - 2 - m := binary.PutUvarint(buf[:], uint64(newOff-lastOff)) - s.postings[ng] = append(s.postings[ng], buf[:m]...) - s.lastOffsets[ng] = newOff + // ASCII trigrams use direct-indexed array (no hash/probe). + var pl *postingList + if runeGram[0] < utf8.RuneSelf && runeGram[1] < utf8.RuneSelf && runeGram[2] < utf8.RuneSelf { + idx := asciiNgramIndex(byte(runeGram[0]), byte(runeGram[1]), byte(runeGram[2])) + pl = s.asciiPostings[idx] + if pl == nil { + pl = &postingList{data: make([]byte, 0, initialPostingCap)} + s.asciiPostings[idx] = pl + s.asciiPopulated = append(s.asciiPopulated, idx) + } else if len(pl.data) == 0 { + // Retained from a previous shard (pool reuse) — re-record + // in asciiPopulated for this shard's writePostings. + s.asciiPopulated = append(s.asciiPopulated, idx) + } + } else { + ng := runesToNGram(runeGram) + pl = s.postings[ng] + if pl == nil { + pl = &postingList{data: make([]byte, 0, initialPostingCap)} + s.postings[ng] = pl + } + } + delta := uint64(newOff - pl.lastOff) + if delta < 0x80 { + // Single-byte varint fast path: ~80% of deltas are < 128. + // append(slice, byte) is cheaper than append(slice, slice...). + pl.data = append(pl.data, byte(delta)) + } else { + m := binary.PutUvarint(buf[:], delta) + pl.data = append(pl.data, buf[:m]...) + } + pl.lastOff = newOff } s.runeCount += runeIndex @@ -162,8 +273,8 @@ func (s *postingsBuilder) newSearchableString(data []byte, byteSections []Docume return &dest, runeSecs, nil } -// IndexBuilder builds a single index shard. -type IndexBuilder struct { +// ShardBuilder builds a single index shard. +type ShardBuilder struct { // The version we will write to disk. Sourcegraph Specific. This is to // enable feature flagging new format versions. indexFormatVersion int @@ -194,7 +305,7 @@ type IndexBuilder struct { namePostings *postingsBuilder // root repositories - repoList []Repository + repoList []zoekt.Repository // name to index. subRepoIndices []map[string]uint32 @@ -205,6 +316,8 @@ type IndexBuilder struct { // language codes, uint16 encoded as little-endian languages []uint8 + categories []byte + // IndexTime will be used as the time if non-zero. Otherwise // time.Now(). This is useful for doing reproducible builds in tests. IndexTime time.Time @@ -213,8 +326,8 @@ type IndexBuilder struct { ID string } -func (d *Repository) verify() error { - for _, t := range []string{d.FileURLTemplate, d.LineFragmentTemplate, d.CommitURLTemplate} { +func verify(repo *zoekt.Repository) error { + for _, t := range []string{repo.FileURLTemplate, repo.LineFragmentTemplate, repo.CommitURLTemplate} { if _, err := ParseTemplate(t); err != nil { return err } @@ -231,7 +344,7 @@ func urlJoinPath(base string, elem ...string) string { // // [1]: https://sourcegraph.com/github.com/golang/go@go1.23.2/-/blob/src/html/template/html.go?L71-80 // [2]: https://github.com/apple/swift-system/blob/main/Sources/System/Util+StringArray.swift - elem = append([]string{}, elem...) // copy to mutate + elem = slices.Clone(elem) // copy to mutate for i := range elem { elem[i] = strings.ReplaceAll(elem[i], "+", "%2B") } @@ -253,24 +366,24 @@ func ParseTemplate(text string) (*template.Template, error) { } // ContentSize returns the number of content bytes so far ingested. -func (b *IndexBuilder) ContentSize() uint32 { +func (b *ShardBuilder) ContentSize() uint32 { // Add the name too so we don't skip building index if we have // lots of empty files. return b.contentPostings.endByte + b.namePostings.endByte } // NumFiles returns the number of files added to this builder -func (b *IndexBuilder) NumFiles() int { +func (b *ShardBuilder) NumFiles() int { return len(b.contentStrings) } -// NewIndexBuilder creates a fresh IndexBuilder. The passed in +// NewShardBuilder creates a fresh ShardBuilder. The passed in // Repository contains repo metadata, and may be set to nil. -func NewIndexBuilder(r *Repository) (*IndexBuilder, error) { - b := newIndexBuilder() +func NewShardBuilder(r *zoekt.Repository) (*ShardBuilder, error) { + b := newShardBuilder(0) if r == nil { - r = &Repository{} + r = &zoekt.Repository{} } if err := b.setRepository(r); err != nil { return nil, err @@ -278,13 +391,27 @@ func NewIndexBuilder(r *Repository) (*IndexBuilder, error) { return b, nil } -func newIndexBuilder() *IndexBuilder { - return &IndexBuilder{ +const defaultShardMax = 100 << 20 // 100 MB, matches Options.ShardMax default + +// newShardBuilder creates a ShardBuilder with fresh postingsBuilders. +// shardMax is the maximum shard content size in bytes (0 uses defaultShardMax). +func newShardBuilder(shardMax int) *ShardBuilder { + if shardMax <= 0 { + shardMax = defaultShardMax + } + return newShardBuilderWithPostings( + newPostingsBuilder(shardMax), + newPostingsBuilder(shardMax), + ) +} + +func newShardBuilderWithPostings(content, name *postingsBuilder) *ShardBuilder { + return &ShardBuilder{ indexFormatVersion: IndexFormatVersion, featureVersion: FeatureVersion, - contentPostings: newPostingsBuilder(), - namePostings: newPostingsBuilder(), + contentPostings: content, + namePostings: name, fileEndSymbol: []uint32{0}, symIndex: make(map[string]uint32), symKindIndex: make(map[string]uint32), @@ -292,8 +419,8 @@ func newIndexBuilder() *IndexBuilder { } } -func (b *IndexBuilder) setRepository(desc *Repository) error { - if err := desc.verify(); err != nil { +func (b *ShardBuilder) setRepository(desc *zoekt.Repository) error { + if err := verify(desc); err != nil { return err } @@ -304,7 +431,7 @@ func (b *IndexBuilder) setRepository(desc *Repository) error { repo := *desc // copy subrepomap without root - repo.SubRepoMap = map[string]*Repository{} + repo.SubRepoMap = map[string]*zoekt.Repository{} for k, v := range desc.SubRepoMap { if k != "" { repo.SubRepoMap[k] = v @@ -316,30 +443,9 @@ func (b *IndexBuilder) setRepository(desc *Repository) error { return b.populateSubRepoIndices() } -type DocumentSection struct { - Start, End uint32 -} - -// Document holds a document (file) to index. -type Document struct { - Name string - Content []byte - Branches []string - SubRepositoryPath string - Language string - - // If set, something is wrong with the file contents, and this - // is the reason it wasn't indexed. - SkipReason string - - // Document sections for symbols. Offsets should use bytes. - Symbols []DocumentSection - SymbolsMetaData []*Symbol -} - type symbolSlice struct { symbols []DocumentSection - metaData []*Symbol + metaData []*zoekt.Symbol } func (s symbolSlice) Len() int { return len(s.symbols) } @@ -354,11 +460,11 @@ func (s symbolSlice) Less(i, j int) bool { } // AddFile is a convenience wrapper for Add -func (b *IndexBuilder) AddFile(name string, content []byte) error { +func (b *ShardBuilder) AddFile(name string, content []byte) error { return b.Add(Document{Name: name, Content: content}) } -func (b *IndexBuilder) populateSubRepoIndices() error { +func (b *ShardBuilder) populateSubRepoIndices() error { if len(b.subRepoIndices) == len(b.repoList) { return nil } @@ -370,7 +476,7 @@ func (b *IndexBuilder) populateSubRepoIndices() error { return nil } -func mkSubRepoIndices(repo Repository) map[string]uint32 { +func mkSubRepoIndices(repo zoekt.Repository) map[string]uint32 { paths := []string{""} for k := range repo.SubRepoMap { paths = append(paths, k) @@ -385,7 +491,7 @@ func mkSubRepoIndices(repo Repository) map[string]uint32 { const notIndexedMarker = "NOT-INDEXED: " -func (b *IndexBuilder) symbolID(sym string) uint32 { +func (b *ShardBuilder) symbolID(sym string) uint32 { if _, ok := b.symIndex[sym]; !ok { b.symIndex[sym] = b.symID b.symID++ @@ -393,7 +499,7 @@ func (b *IndexBuilder) symbolID(sym string) uint32 { return b.symIndex[sym] } -func (b *IndexBuilder) symbolKindID(t string) uint32 { +func (b *ShardBuilder) symbolKindID(t string) uint32 { if _, ok := b.symKindIndex[t]; !ok { b.symKindIndex[t] = b.symKindID b.symKindID++ @@ -401,7 +507,7 @@ func (b *IndexBuilder) symbolKindID(t string) uint32 { return b.symKindIndex[t] } -func (b *IndexBuilder) addSymbols(symbols []*Symbol) { +func (b *ShardBuilder) addSymbols(symbols []*zoekt.Symbol) { for _, sym := range symbols { b.symMetaData = append(b.symMetaData, // This field was removed due to redundancy. To avoid @@ -416,30 +522,45 @@ func (b *IndexBuilder) addSymbols(symbols []*Symbol) { } func DetermineLanguageIfUnknown(doc *Document) { - if doc.Language == "" { - doc.Language = languages.GetLanguage(doc.Name, doc.Content) + if doc.Language != "" { + return + } + + // If this document has been skipped (doc.SkipReason != SkipReasonNone), it's + // likely very large, or it's a non-code file like binary. In this case, we just + // guess the language based on the file name to avoid examining the contents. + // Note: passing nil content is allowed by the go-enry contract (the underlying + // library we use here). + var content []byte + if doc.SkipReason == SkipReasonNone { + content = doc.Content + } + langs := languages.GetLanguagesFromContent(doc.Name, content) + if len(langs) > 0 { + doc.Language = langs[0] } } // Add a file which only occurs in certain branches. -func (b *IndexBuilder) Add(doc Document) error { - hasher := crc64.New(crc64.MakeTable(crc64.ISO)) - - if idx := bytes.IndexByte(doc.Content, 0); idx >= 0 { - doc.SkipReason = fmt.Sprintf("binary content at byte offset %d", idx) - doc.Language = "binary" +func (b *ShardBuilder) Add(doc Document) error { + // Skip binary check if already computed (e.g., by Builder.Add + // which calls DocChecker.Check before docs reach buildShard). + if doc.Category == FileCategoryMissing { + if index := bytes.IndexByte(doc.Content, 0); index > 0 { + doc.SkipReason = SkipReasonBinary + } } - if doc.SkipReason != "" { - doc.Content = []byte(notIndexedMarker + doc.SkipReason) + if doc.SkipReason != SkipReasonNone { + doc.Content = []byte(notIndexedMarker + doc.SkipReason.explanation()) doc.Symbols = nil doc.SymbolsMetaData = nil - if doc.Language == "" { - doc.Language = "skipped" - } } DetermineLanguageIfUnknown(&doc) + if doc.Category == FileCategoryMissing { + DetermineFileCategory(&doc) + } sort.Sort(symbolSlice{doc.Symbols, doc.SymbolsMetaData}) var last DocumentSection @@ -493,6 +614,7 @@ func (b *IndexBuilder) Add(doc Document) error { b.subRepos = append(b.subRepos, subRepoIdx) b.repos = append(b.repos, uint16(repoIdx)) + hasher := crc64.New(crc64.MakeTable(crc64.ISO)) hasher.Write(doc.Content) b.contentStrings = append(b.contentStrings, docStr) @@ -514,10 +636,16 @@ func (b *IndexBuilder) Add(doc Document) error { } b.languages = append(b.languages, uint8(langCode), uint8(langCode>>8)) + category, err := doc.Category.encode() + if err != nil { + return err + } + b.categories = append(b.categories, category) + return nil } -func (b *IndexBuilder) branchMask(br string) uint64 { +func (b *ShardBuilder) branchMask(br string) uint64 { for i, b := range b.repoList[len(b.repoList)-1].Branches { if b.Name == br { return uint64(1) << uint(i) @@ -526,29 +654,46 @@ func (b *IndexBuilder) branchMask(br string) uint64 { return 0 } +// repoIDs returns a list of sourcegraph IDs for the indexed repos. If the ID +// is missing or there are no repos, this returns false. +func (b *ShardBuilder) repoIDs() ([]uint32, bool) { + if len(b.repoList) == 0 { + return nil, false + } + + ids := make([]uint32, 0, len(b.repoList)) + for _, repo := range b.repoList { + if repo.ID == 0 { + return nil, false + } + ids = append(ids, repo.ID) + } + return ids, true +} + type DocChecker struct { // A map to count the unique trigrams in a doc. Reused across docs to cut down on allocations. trigrams map[ngram]struct{} } // Check returns a reason why the given contents are probably not source texts. -func (t *DocChecker) Check(content []byte, maxTrigramCount int, allowLargeFile bool) error { +func (t *DocChecker) Check(content []byte, maxTrigramCount int, allowLargeFile bool) SkipReason { if len(content) == 0 { - return nil + return SkipReasonNone } if len(content) < ngramSize { - return fmt.Errorf("file size smaller than %d", ngramSize) + return SkipReasonTooSmall } if index := bytes.IndexByte(content, 0); index > 0 { - return fmt.Errorf("binary data at byte offset %d", index) + return SkipReasonBinary } // PERF: we only need to do the trigram check if the upperbound on content is greater than // our threshold. Also skip the trigram check if the file is explicitly marked as allowed. if trigramsUpperBound := len(content) - ngramSize + 1; trigramsUpperBound <= maxTrigramCount || allowLargeFile { - return nil + return SkipReasonNone } var cur [3]rune @@ -569,10 +714,10 @@ func (t *DocChecker) Check(content []byte, maxTrigramCount int, allowLargeFile b t.trigrams[runesToNGram(cur)] = struct{}{} if len(t.trigrams) > maxTrigramCount { // probably not text. - return fmt.Errorf("number of trigrams exceeds %d", maxTrigramCount) + return SkipReasonTooManyTrigrams } } - return nil + return SkipReasonNone } func (t *DocChecker) clearTrigrams(maxTrigramCount int) { @@ -584,9 +729,9 @@ func (t *DocChecker) clearTrigrams(maxTrigramCount int) { } } -// ShardName returns the name of the shard for the given prefix, version, and +// shardName returns the name of the shard for the given prefix, version, and // shard number. -func ShardName(indexDir string, prefix string, version, n int) string { +func shardName(indexDir string, prefix string, version, n int) string { prefix = url.QueryEscape(prefix) if len(prefix) > 200 { prefix = prefix[:200] + hashString(prefix)[:8] diff --git a/index/shard_builder_test.go b/index/shard_builder_test.go new file mode 100644 index 000000000..2a0026721 --- /dev/null +++ b/index/shard_builder_test.go @@ -0,0 +1,95 @@ +package index + +import ( + "strings" + "testing" +) + +func TestShardName(t *testing.T) { + tests := []struct { + name string + indexDir string + prefix string + version int + shardNum int + expected string + }{ + { + name: "short prefix", + indexDir: "index", + prefix: "short", + version: 1, + shardNum: 42, + expected: "index/short_v1.00042.zoekt", + }, + { + name: "long prefix truncated", + indexDir: "index", + prefix: strings.Repeat("a", 300), + version: 2, + shardNum: 1, + expected: "index/" + strings.Repeat("a", 200) + "003ef1ba" + "_v2.00001.zoekt", + }, + { + name: "empty indexDir", + prefix: "short", + version: 1, + expected: "short_v1.00000.zoekt", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + actual := shardName(test.indexDir, test.prefix, test.version, test.shardNum) + if actual != test.expected { + t.Errorf("expected %q, got %q", test.expected, actual) + } + }) + } +} + +func TestDetermineLanguageIfUnknown(t *testing.T) { + tests := []struct { + name string + doc Document + wantLang string + skipContent bool + }{ + { + name: "already has language", + doc: Document{ + Name: "test.java", + Language: "Go", + Content: []byte("package main"), + }, + wantLang: "Go", + }, + { + name: "skipped file", + doc: Document{ + Name: "large.js", + SkipReason: SkipReasonTooLarge, + Content: []byte(notIndexedMarker + "too large"), + }, + wantLang: "JavaScript", + }, + { + name: "skipped file with unknown extension", + doc: Document{ + Name: "deadb33f", + SkipReason: SkipReasonBinary, + Content: []byte(notIndexedMarker + "binary"), + }, + wantLang: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + DetermineLanguageIfUnknown(&tt.doc) + if tt.doc.Language != tt.wantLang { + t.Errorf("DetermineLanguageIfUnknown() got language = %v, want %v", tt.doc.Language, tt.wantLang) + } + }) + } +} diff --git a/toc.go b/index/toc.go similarity index 97% rename from toc.go rename to index/toc.go index 8eee56950..f42887939 100644 --- a/toc.go +++ b/index/toc.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index // IndexFormatVersion is a version number. It is increased every time the // on-disk index format is changed. @@ -78,6 +78,7 @@ type indexTOC struct { runeOffsets simpleSection fileEndRunes simpleSection languages simpleSection + categories simpleSection fileEndSymbol simpleSection symbolMap lazyCompoundSection @@ -96,7 +97,8 @@ type indexTOC struct { contentChecksums simpleSection runeDocSections simpleSection - repos simpleSection + repos simpleSection + reposIDsBitmap simpleSection ranks simpleSection } @@ -179,8 +181,10 @@ func (t *indexTOC) sectionsTaggedList() []taggedSection { {"nameEndRunes", &t.nameEndRunes}, {"contentChecksums", &t.contentChecksums}, {"languages", &t.languages}, + {"categories", &t.categories}, {"runeDocSections", &t.runeDocSections}, {"repos", &t.repos}, + {"reposIDsBitmap", &t.reposIDsBitmap}, // We no longer write these sections, but we still return them here to avoid // warnings about unknown sections. diff --git a/tombstones.go b/index/tombstones.go similarity index 91% rename from tombstones.go rename to index/tombstones.go index a44eb37b1..e1ea8eb77 100644 --- a/tombstones.go +++ b/index/tombstones.go @@ -1,13 +1,15 @@ -package zoekt +package index import ( "encoding/json" "fmt" "os" "path/filepath" + + "github.com/sourcegraph/zoekt" ) -var mockRepos []*Repository +var mockRepos []*zoekt.Repository // SetTombstone idempotently sets a tombstone for repoName in .meta. func SetTombstone(shardPath string, repoID uint32) error { @@ -20,7 +22,7 @@ func UnsetTombstone(shardPath string, repoID uint32) error { } func setTombstone(shardPath string, repoID uint32, tombstone bool) error { - var repos []*Repository + var repos []*zoekt.Repository var err error if mockRepos != nil { @@ -58,7 +60,7 @@ func setTombstone(shardPath string, repoID uint32, tombstone bool) error { // The caller is responsible for renaming the temporary file to the final file path, or removing // the temporary file if it is no longer needed. // TODO: Should we stick this in a util package? -func JsonMarshalRepoMetaTemp(shardPath string, repositoryMetadata interface{}) (tempPath, finalPath string, err error) { +func JsonMarshalRepoMetaTemp(shardPath string, repositoryMetadata any) (tempPath, finalPath string, err error) { finalPath = shardPath + ".meta" b, err := json.Marshal(repositoryMetadata) @@ -90,6 +92,3 @@ func JsonMarshalRepoMetaTemp(shardPath string, repositoryMetadata interface{}) ( return f.Name(), finalPath, nil } - -// umask holds the Umask of the current process -var umask os.FileMode diff --git a/tombstones_test.go b/index/tombstones_test.go similarity index 80% rename from tombstones_test.go rename to index/tombstones_test.go index 7a39cfff7..30120b85b 100644 --- a/tombstones_test.go +++ b/index/tombstones_test.go @@ -1,10 +1,12 @@ -package zoekt +package index import ( "encoding/json" "os" "path/filepath" "testing" + + "github.com/sourcegraph/zoekt" ) func TestSetTombstone(t *testing.T) { @@ -24,7 +26,7 @@ func TestSetTombstone(t *testing.T) { isAlive := func(alive []bool) { t.Helper() blob := readMeta(ghostShard) - ghostRepos := []*Repository{} + ghostRepos := []*zoekt.Repository{} if err := json.Unmarshal(blob, &ghostRepos); err != nil { t.Fatal(err) } @@ -51,10 +53,10 @@ func TestSetTombstone(t *testing.T) { isAlive([]bool{false, true, true}) } -func mkRepos(repoNames ...string) []*Repository { - ret := make([]*Repository, 0, len(repoNames)) +func mkRepos(repoNames ...string) []*zoekt.Repository { + ret := make([]*zoekt.Repository, 0, len(repoNames)) for i, n := range repoNames { - ret = append(ret, &Repository{ID: uint32(i + 1), Name: n}) + ret = append(ret, &zoekt.Repository{ID: uint32(i + 1), Name: n}) } return ret } diff --git a/write.go b/index/write.go similarity index 79% rename from write.go rename to index/write.go index 278ebc025..6c37e6107 100644 --- a/write.go +++ b/index/write.go @@ -12,17 +12,23 @@ // See the License for the specific language governing permissions and // limitations under the License. -package zoekt +package index import ( "bufio" "bytes" + "cmp" "encoding/binary" "encoding/json" "fmt" "io" + "slices" "sort" "time" + + "github.com/sourcegraph/zoekt" + + "github.com/RoaringBitmap/roaring" ) func (w *writer) writeTOC(toc *indexTOC) { @@ -66,26 +72,46 @@ func (s *compoundSection) writeMap(w *writer, m map[string]uint32) { s.writeStrings(w, keys) } +func writeUint32Bitmap(w *writer, dat []uint32) { + rb := roaring.BitmapOf(dat...) + rb.RunOptimize() + rb.WriteTo(w) +} + func writePostings(w *writer, s *postingsBuilder, ngramText *simpleSection, charOffsets *simpleSection, postings *compoundSection, endRunes *simpleSection, ) { - keys := make(ngramSlice, 0, len(s.postings)) - for k := range s.postings { - keys = append(keys, k) + // Collect ngrams from both the ASCII direct-indexed array and the + // non-ASCII map, then sort by ngram value. + type ngramPosting struct { + ng ngram + pl *postingList + } + all := make([]ngramPosting, 0, len(s.asciiPopulated)+len(s.postings)) + for _, idx := range s.asciiPopulated { + pl := s.asciiPostings[idx] + if len(pl.data) > 0 { + all = append(all, ngramPosting{asciiIndexToNgram(idx), pl}) + } } - sort.Sort(keys) + for k, pl := range s.postings { + if len(pl.data) > 0 { + all = append(all, ngramPosting{k, pl}) + } + } + slices.SortFunc(all, func(a, b ngramPosting) int { return cmp.Compare(a.ng, b.ng) }) ngramText.start(w) - for _, k := range keys { + for _, np := range all { var buf [8]byte - binary.BigEndian.PutUint64(buf[:], uint64(k)) + binary.BigEndian.PutUint64(buf[:], uint64(np.ng)) w.Write(buf[:]) } ngramText.end(w) postings.start(w) - for _, k := range keys { - postings.addItem(w, s.postings[k]) + for _, np := range all { + postings.addItem(w, np.pl.data) } postings.end(w) @@ -98,7 +124,7 @@ func writePostings(w *writer, s *postingsBuilder, ngramText *simpleSection, endRunes.end(w) } -func (b *IndexBuilder) Write(out io.Writer) error { +func (b *ShardBuilder) Write(out io.Writer) error { next := b.indexFormatVersion == NextIndexFormatVersion buffered := bufio.NewWriterSize(out, 1<<20) @@ -159,6 +185,10 @@ func (b *IndexBuilder) Write(out io.Writer) error { w.Write(b.languages) toc.languages.end(w) + toc.categories.start(w) + w.Write(b.categories) + toc.categories.end(w) + toc.runeDocSections.start(w) w.Write(marshalDocSections(b.runeDocSections)) toc.runeDocSections.end(w) @@ -169,12 +199,18 @@ func (b *IndexBuilder) Write(out io.Writer) error { toc.repos.end(w) } + if repoIDs, ok := b.repoIDs(); ok && next { + toc.reposIDsBitmap.start(w) + writeUint32Bitmap(w, repoIDs) + toc.reposIDsBitmap.end(w) + } + indexTime := b.IndexTime if indexTime.IsZero() { indexTime = time.Now().UTC() } - if err := b.writeJSON(&IndexMetadata{ + if err := b.writeJSON(&zoekt.IndexMetadata{ IndexFormatVersion: b.indexFormatVersion, IndexTime: indexTime, IndexFeatureVersion: b.featureVersion, @@ -209,7 +245,7 @@ func (b *IndexBuilder) Write(out io.Writer) error { return w.err } -func (b *IndexBuilder) writeJSON(data interface{}, sec *simpleSection, w *writer) error { +func (b *ShardBuilder) writeJSON(data any, sec *simpleSection, w *writer) error { blob, err := json.Marshal(data) if err != nil { return err diff --git a/indexbuilder_test.go b/indexbuilder_test.go deleted file mode 100644 index 7487a5ad3..000000000 --- a/indexbuilder_test.go +++ /dev/null @@ -1,49 +0,0 @@ -package zoekt - -import ( - "strings" - "testing" -) - -func TestShardName(t *testing.T) { - tests := []struct { - name string - indexDir string - prefix string - version int - shardNum int - expected string - }{ - { - name: "short prefix", - indexDir: "index", - prefix: "short", - version: 1, - shardNum: 42, - expected: "index/short_v1.00042.zoekt", - }, - { - name: "long prefix truncated", - indexDir: "index", - prefix: strings.Repeat("a", 300), - version: 2, - shardNum: 1, - expected: "index/" + strings.Repeat("a", 200) + "003ef1ba" + "_v2.00001.zoekt", - }, - { - name: "empty indexDir", - prefix: "short", - version: 1, - expected: "short_v1.00000.zoekt", - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - actual := ShardName(test.indexDir, test.prefix, test.version, test.shardNum) - if actual != test.expected { - t.Errorf("expected %q, got %q", test.expected, actual) - } - }) - } -} diff --git a/indexfile_other.go b/indexfile_other.go deleted file mode 100644 index f192d9beb..000000000 --- a/indexfile_other.go +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2016 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !linux && !darwin -// +build !linux,!darwin - -package zoekt - -import ( - "fmt" - "os" -) - -// NewIndexFile returns a new index file. The index file takes -// ownership of the passed in file, and may close it. -func NewIndexFile(f *os.File) (IndexFile, error) { - return &indexFileFromOS{f}, nil -} - -type indexFileFromOS struct { - f *os.File -} - -func (f *indexFileFromOS) Read(off, sz uint32) ([]byte, error) { - r := make([]byte, sz) - _, err := f.f.ReadAt(r, int64(off)) - return r, err -} - -func (f indexFileFromOS) Size() (uint32, error) { - fi, err := f.f.Stat() - if err != nil { - return 0, err - } - - sz := fi.Size() - - if sz >= maxUInt32 { - return 0, fmt.Errorf("overflow") - } - - return uint32(sz), nil -} - -func (f indexFileFromOS) Close() { - f.f.Close() -} - -func (f indexFileFromOS) Name() string { - return f.f.Name() -} diff --git a/install-ctags-alpine.sh b/install-ctags-alpine.sh index c94968733..ae0cfcc93 100755 --- a/install-ctags-alpine.sh +++ b/install-ctags-alpine.sh @@ -9,10 +9,14 @@ CTAGS_ARCHIVE_TOP_LEVEL_DIR=ctags-6.1.0 # When using commits you can rely on # CTAGS_ARCHIVE_TOP_LEVEL_DIR=ctags-$CTAGS_VERSION +CTAGS_TMPDIR= + cleanup() { apk --no-cache --purge del ctags-build-deps || true cd / - rm -rf /tmp/ctags-$CTAGS_VERSION + if [ -n "$CTAGS_TMPDIR" ]; then + rm -rf "$CTAGS_TMPDIR" + fi } trap cleanup EXIT @@ -35,11 +39,12 @@ apk --no-cache add \ apk --no-cache add jansson NUMCPUS=$(grep -c '^processor' /proc/cpuinfo) +CTAGS_TMPDIR=$(mktemp -d /tmp/ctags.XXXXXX) # Installation -curl --retry 5 "https://codeload.github.com/universal-ctags/ctags/tar.gz/$CTAGS_VERSION" | tar xz -C /tmp -cd /tmp/$CTAGS_ARCHIVE_TOP_LEVEL_DIR +curl --retry 5 "https://codeload.github.com/universal-ctags/ctags/tar.gz/$CTAGS_VERSION" | tar xz -C "$CTAGS_TMPDIR" +cd "$CTAGS_TMPDIR/$CTAGS_ARCHIVE_TOP_LEVEL_DIR" ./autogen.sh -./configure --program-prefix=universal- --enable-json +./configure --program-prefix=universal- --enable-json --disable-readcmd make -j"$NUMCPUS" --load-average="$NUMCPUS" make install diff --git a/internal/archive/e2e_test.go b/internal/archive/e2e_test.go index b861eb09a..cff643954 100644 --- a/internal/archive/e2e_test.go +++ b/internal/archive/e2e_test.go @@ -19,9 +19,9 @@ import ( "github.com/stretchr/testify/require" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/query" - "github.com/sourcegraph/zoekt/shards" + "github.com/sourcegraph/zoekt/search" ) func TestMain(m *testing.M) { @@ -118,7 +118,7 @@ func testIndexIncrementally(t *testing.T, format string) { fileSize := 1000 files := map[string]string{} - for i := 0; i < 4; i++ { + for i := range 4 { s := fmt.Sprintf("%d", i) files["F"+s] = strings.Repeat("a", fileSize) files["!F"+s] = strings.Repeat("a", fileSize) @@ -161,7 +161,7 @@ func testIndexIncrementally(t *testing.T, format string) { for _, test := range tests { largeFiles, wantNumFiles := test.largeFiles, test.wantNumFiles - bopts := build.Options{ + bopts := index.Options{ SizeMax: fileSize - 1, IndexDir: indexDir, LargeFiles: largeFiles, @@ -179,7 +179,7 @@ func testIndexIncrementally(t *testing.T, format string) { t.Fatalf("error creating index: %v", err) } - ss, err := shards.NewDirectorySearcher(indexDir) + ss, err := search.NewDirectorySearcher(indexDir) if err != nil { t.Fatalf("NewDirectorySearcher(%s): %v", indexDir, err) } @@ -220,7 +220,7 @@ func testLatestCommitDate(t *testing.T, format string) { fileSize := 10 files := map[string]string{} - for i := 0; i < 4; i++ { + for i := range 4 { s := fmt.Sprintf("%d", i) files["F"+s] = strings.Repeat("a", fileSize) files["!F"+s] = strings.Repeat("a", fileSize) @@ -234,7 +234,7 @@ func testLatestCommitDate(t *testing.T, format string) { // Index indexDir := t.TempDir() - bopts := build.Options{ + bopts := index.Options{ IndexDir: indexDir, } opts := Options{ @@ -254,7 +254,7 @@ func testLatestCommitDate(t *testing.T, format string) { indexFiles, err := f.Readdirnames(1) require.Len(t, indexFiles, 1) - repos, _, err := zoekt.ReadMetadataPath(filepath.Join(indexDir, indexFiles[0])) + repos, _, err := index.ReadMetadataPath(filepath.Join(indexDir, indexFiles[0])) require.NoError(t, err) require.Len(t, repos, 1) require.True(t, repos[0].LatestCommitDate.Equal(modTime)) diff --git a/internal/archive/index.go b/internal/archive/index.go index c8836768f..b940d6942 100644 --- a/internal/archive/index.go +++ b/internal/archive/index.go @@ -10,7 +10,7 @@ import ( "sync" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" ) // Options specify the archive specific indexing options. @@ -74,7 +74,7 @@ func (o *Options) SetDefaults() { } // Index archive specified in opts using bopts. -func Index(opts Options, bopts build.Options) error { +func Index(opts Options, bopts index.Options) error { opts.SetDefaults() if opts.Name == "" && opts.RepoURL == "" { @@ -114,7 +114,7 @@ func Index(opts Options, bopts build.Options) error { defer a.Close() bopts.RepositoryDescription.Source = opts.Archive - var builder *build.Builder + var builder *index.Builder once := sync.Once{} var onceErr error @@ -124,7 +124,7 @@ func Index(opts Options, bopts build.Options) error { once.Do(func() { // We use the ModTime of the first file as a proxy for the latest commit date. bopts.RepositoryDescription.LatestCommitDate = f.ModTime - builder, onceErr = build.NewBuilder(bopts) + builder, onceErr = index.NewBuilder(bopts) }) if onceErr != nil { return onceErr @@ -140,7 +140,7 @@ func Index(opts Options, bopts build.Options) error { return nil } - return builder.Add(zoekt.Document{ + return builder.Add(index.Document{ Name: name, Content: contents, Branches: brs, diff --git a/ctags/parser.go b/internal/ctags/parser.go similarity index 100% rename from ctags/parser.go rename to internal/ctags/parser.go diff --git a/ctags/parser_bins.go b/internal/ctags/parser_bins.go similarity index 100% rename from ctags/parser_bins.go rename to internal/ctags/parser_bins.go diff --git a/ctags/parser_test.go b/internal/ctags/parser_test.go similarity index 100% rename from ctags/parser_test.go rename to internal/ctags/parser_test.go diff --git a/ctags/symbol_kind.go b/internal/ctags/symbol_kind.go similarity index 100% rename from ctags/symbol_kind.go rename to internal/ctags/symbol_kind.go diff --git a/debugserver/debug.go b/internal/debugserver/debug.go similarity index 96% rename from debugserver/debug.go rename to internal/debugserver/debug.go index 55792d8d8..9955e0746 100644 --- a/debugserver/debug.go +++ b/internal/debugserver/debug.go @@ -11,7 +11,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" "golang.org/x/net/trace" - "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" ) var registerOnce sync.Once @@ -87,5 +87,5 @@ func AddHandlers(mux *http.ServeMux, enablePprof bool, p ...DebugPage) { func register() { promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "zoekt_version", - }, []string{"version"}).WithLabelValues(zoekt.Version).Set(1) + }, []string{"version"}).WithLabelValues(index.Version).Set(1) } diff --git a/debugserver/expvar.go b/internal/debugserver/expvar.go similarity index 100% rename from debugserver/expvar.go rename to internal/debugserver/expvar.go diff --git a/build/e2e_test.go b/internal/e2e/e2e_index_test.go similarity index 74% rename from build/e2e_test.go rename to internal/e2e/e2e_index_test.go index e899b3b73..453263385 100644 --- a/build/e2e_test.go +++ b/internal/e2e/e2e_index_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package build +package e2e import ( "bytes" @@ -22,7 +22,6 @@ import ( "log" "os" "path/filepath" - "reflect" "runtime" "sort" "strconv" @@ -36,16 +35,17 @@ import ( "github.com/stretchr/testify/require" "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/tenant" "github.com/sourcegraph/zoekt/internal/tenant/tenanttest" "github.com/sourcegraph/zoekt/query" - "github.com/sourcegraph/zoekt/shards" + "github.com/sourcegraph/zoekt/search" ) -func TestBasic(t *testing.T) { +func TestBasicIndexing(t *testing.T) { dir := t.TempDir() - opts := Options{ + opts := index.Options{ IndexDir: dir, ShardMax: 1024, RepositoryDescription: zoekt.Repository{ @@ -55,12 +55,12 @@ func TestBasic(t *testing.T) { SizeMax: 1 << 20, } - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } - for i := 0; i < 4; i++ { + for i := range 4 { s := fmt.Sprintf("%d", i) if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1000))); err != nil { t.Fatal(err) @@ -76,12 +76,12 @@ func TestBasic(t *testing.T) { t.Fatalf("want multiple shards, got %v", fs) } - _, md0, err := zoekt.ReadMetadataPath(fs[0]) + _, md0, err := index.ReadMetadataPath(fs[0]) if err != nil { t.Fatal(err) } for _, f := range fs[1:] { - _, md, err := zoekt.ReadMetadataPath(f) + _, md, err := index.ReadMetadataPath(f) if err != nil { t.Fatal(err) } @@ -93,7 +93,7 @@ func TestBasic(t *testing.T) { } } - ss, err := shards.NewDirectorySearcher(dir) + ss, err := search.NewDirectorySearcher(dir) if err != nil { t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) } @@ -122,12 +122,12 @@ func TestBasic(t *testing.T) { t.Run("meta file", func(t *testing.T) { // use retryTest to allow for the directory watcher to notice the meta // file - retryTest(t, func(fatalf func(format string, args ...interface{})) { + retryTest(t, func(fatalf func(format string, args ...any)) { // Add a .meta file for each shard with repo.Name set to // "repo-mutated". We do this inside retry helper since we have noticed // some flakiness on github CI. for _, p := range fs { - repos, _, err := zoekt.ReadMetadataPath(p) + repos, _, err := index.ReadMetadataPath(p) if err != nil { t.Fatal(err) } @@ -167,7 +167,7 @@ func TestSearchTenant(t *testing.T) { tnt1, err := tenant.FromContext(ctx1) require.NoError(t, err) - opts := Options{ + opts := index.Options{ IndexDir: dir, ShardMax: 1024, RepositoryDescription: zoekt.Repository{ @@ -178,12 +178,12 @@ func TestSearchTenant(t *testing.T) { SizeMax: 1 << 20, } - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } - for i := 0; i < 4; i++ { + for i := range 4 { s := fmt.Sprintf("%d", i) if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1000))); err != nil { t.Fatal(err) @@ -199,12 +199,12 @@ func TestSearchTenant(t *testing.T) { t.Fatalf("want multiple shards, got %v", fs) } - _, md0, err := zoekt.ReadMetadataPath(fs[0]) + _, md0, err := index.ReadMetadataPath(fs[0]) if err != nil { t.Fatal(err) } for _, f := range fs[1:] { - _, md, err := zoekt.ReadMetadataPath(f) + _, md, err := index.ReadMetadataPath(f) if err != nil { t.Fatal(err) } @@ -216,7 +216,7 @@ func TestSearchTenant(t *testing.T) { } } - ss, err := shards.NewDirectorySearcher(dir) + ss, err := search.NewDirectorySearcher(dir) if err != nil { t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) } @@ -250,7 +250,7 @@ func TestListTenant(t *testing.T) { tnt1, err := tenant.FromContext(ctx1) require.NoError(t, err) - opts := Options{ + opts := index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ Name: "repo", @@ -259,7 +259,7 @@ func TestListTenant(t *testing.T) { } opts.SetDefaults() - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -272,7 +272,7 @@ func TestListTenant(t *testing.T) { t.Fatalf("want a shard, got %v", fs) } - ss, err := shards.NewDirectorySearcher(dir) + ss, err := search.NewDirectorySearcher(dir) if err != nil { t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) } @@ -292,7 +292,7 @@ func TestListTenant(t *testing.T) { // retryTest will retry f until min(t.Deadline(), time.Minute). It returns // once f doesn't call fatalf. -func retryTest(t *testing.T, f func(fatalf func(format string, args ...interface{}))) { +func retryTest(t *testing.T, f func(fatalf func(format string, args ...any))) { t.Helper() sleep := 10 * time.Millisecond @@ -307,7 +307,7 @@ func retryTest(t *testing.T, f func(fatalf func(format string, args ...interface go func() { defer close(done) - f(func(format string, args ...interface{}) { + f(func(format string, args ...any) { runtime.Goexit() }) @@ -335,7 +335,7 @@ func TestLargeFileOption(t *testing.T) { dir := t.TempDir() sizeMax := 1000 - opts := Options{ + opts := index.Options{ IndexDir: dir, LargeFiles: []string{"F0", "F1", "F2", "!F1"}, RepositoryDescription: zoekt.Repository{ @@ -344,12 +344,12 @@ func TestLargeFileOption(t *testing.T) { SizeMax: sizeMax, } - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } - for i := 0; i < 4; i++ { + for i := range 4 { s := fmt.Sprintf("%d", i) if err := b.AddFile("F"+s, []byte(strings.Repeat("a", sizeMax+1))); err != nil { t.Fatal(err) @@ -360,7 +360,7 @@ func TestLargeFileOption(t *testing.T) { t.Errorf("Finish: %v", err) } - ss, err := shards.NewDirectorySearcher(dir) + ss, err := search.NewDirectorySearcher(dir) if err != nil { t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) } @@ -386,7 +386,7 @@ func TestLargeFileOption(t *testing.T) { func TestUpdate(t *testing.T) { dir := t.TempDir() - opts := Options{ + opts := index.Options{ IndexDir: dir, ShardMax: 1024, RepositoryDescription: zoekt.Repository{ @@ -397,7 +397,7 @@ func TestUpdate(t *testing.T) { SizeMax: 1 << 20, } - if b, err := NewBuilder(opts); err != nil { + if b, err := index.NewBuilder(opts); err != nil { t.Fatalf("NewBuilder: %v", err) } else { if err := b.AddFile("F", []byte("hoi")); err != nil { @@ -407,7 +407,7 @@ func TestUpdate(t *testing.T) { t.Errorf("Finish: %v", err) } } - ss, err := shards.NewDirectorySearcher(dir) + ss, err := search.NewDirectorySearcher(dir) if err != nil { t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) } @@ -432,7 +432,7 @@ func TestUpdate(t *testing.T) { FileURLTemplate: "url2", } - if b, err := NewBuilder(opts); err != nil { + if b, err := index.NewBuilder(opts); err != nil { t.Fatalf("NewBuilder: %v", err) } else { if err := b.AddFile("F", []byte("hoi")); err != nil { @@ -479,7 +479,7 @@ func TestUpdate(t *testing.T) { func TestDeleteOldShards(t *testing.T) { dir := t.TempDir() - opts := Options{ + opts := index.Options{ IndexDir: dir, ShardMax: 1024, RepositoryDescription: zoekt.Repository{ @@ -490,11 +490,11 @@ func TestDeleteOldShards(t *testing.T) { } opts.SetDefaults() - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } - for i := 0; i < 4; i++ { + for i := range 4 { s := fmt.Sprintf("%d\n", i) if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1024/2))); err != nil { t.Errorf("AddFile: %v", err) @@ -521,11 +521,11 @@ func TestDeleteOldShards(t *testing.T) { // Do again, without sharding. opts.ShardMax = 1 << 20 - b, err = NewBuilder(opts) + b, err = index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } - for i := 0; i < 4; i++ { + for i := range 4 { s := fmt.Sprintf("%d\n", i) if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1024/2))); err != nil { t.Fatal(err) @@ -543,7 +543,7 @@ func TestDeleteOldShards(t *testing.T) { } // Again, but don't index anything; should leave old shards intact. - b, err = NewBuilder(opts) + b, err = index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -559,141 +559,10 @@ func TestDeleteOldShards(t *testing.T) { } } -func TestPartialSuccess(t *testing.T) { - dir := t.TempDir() - - opts := Options{ - IndexDir: dir, - ShardMax: 1024, - SizeMax: 1 << 20, - Parallelism: 1, - } - opts.RepositoryDescription.Name = "repo" - opts.SetDefaults() - - b, err := NewBuilder(opts) - if err != nil { - t.Fatalf("NewBuilder: %v", err) - } - - for i := 0; i < 4; i++ { - nm := fmt.Sprintf("F%d", i) - _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128))) - } - b.buildError = fmt.Errorf("any error") - - // No error checking. - _ = b.Finish() - - // Finish cleans up temporary files. - if fs, err := filepath.Glob(dir + "/*"); err != nil { - t.Errorf("glob(%s): %v", dir, err) - } else if len(fs) != 0 { - t.Errorf("got shards %v, want []", fs) - } -} - -type filerankCase struct { - name string - docs []*zoekt.Document - want []int -} - -func testFileRankAspect(t *testing.T, c filerankCase) { - var want []*zoekt.Document - for _, j := range c.want { - want = append(want, c.docs[j]) - } - - got := make([]*zoekt.Document, len(c.docs)) - copy(got, c.docs) - sortDocuments(got) - - print := func(ds []*zoekt.Document) string { - r := "" - for _, d := range ds { - r += fmt.Sprintf("%v, ", d) - } - return r - } - if !reflect.DeepEqual(got, want) { - t.Errorf("got docs [%v], want [%v]", print(got), print(want)) - } -} - -func TestFileRank(t *testing.T) { - for _, c := range []filerankCase{{ - name: "filename", - docs: []*zoekt.Document{ - { - Name: "longlonglong", - Content: []byte("bla"), - }, - { - Name: "short", - Content: []byte("bla"), - }, - }, - want: []int{1, 0}, - }, { - name: "test", - docs: []*zoekt.Document{ - { - Name: "foo_test.go", - Content: []byte("bla"), - }, - { - Name: "longlonglong", - Content: []byte("bla"), - }, - }, - want: []int{1, 0}, - }, { - name: "content", - docs: []*zoekt.Document{ - { - Content: []byte("bla"), - }, - { - Content: []byte("blablablabla"), - }, - { - Content: []byte("blabla"), - }, - }, - want: []int{0, 2, 1}, - }, { - name: "skipped docs", - docs: []*zoekt.Document{ - { - Name: "binary_file", - SkipReason: "binary file", - }, - { - Name: "some_test.go", - Content: []byte("bla"), - }, - { - Name: "large_file.go", - SkipReason: "too large", - }, - { - Name: "file.go", - Content: []byte("blabla"), - }, - }, - want: []int{3, 1, 0, 2}, - }} { - t.Run(c.name, func(t *testing.T) { - testFileRankAspect(t, c) - }) - } -} - func TestEmptyContent(t *testing.T) { dir := t.TempDir() - opts := Options{ + opts := index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ Name: "repo", @@ -701,7 +570,7 @@ func TestEmptyContent(t *testing.T) { } opts.SetDefaults() - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -714,7 +583,7 @@ func TestEmptyContent(t *testing.T) { t.Fatalf("want a shard, got %v", fs) } - ss, err := shards.NewDirectorySearcher(dir) + ss, err := search.NewDirectorySearcher(dir) if err != nil { t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) } @@ -735,21 +604,22 @@ func TestDeltaShards(t *testing.T) { // TODO: Need to write a test for compound shards as well. type step struct { name string - documents []zoekt.Document - optFn func(t *testing.T, o *Options) + documents []index.Document + optFn func(t *testing.T, o *index.Options) query string - expectedDocuments []zoekt.Document + changedFile string + expectedDocuments []index.Document } var ( - fooAtMain = zoekt.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v1")} - fooAtMainV2 = zoekt.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v2")} + fooAtMain = index.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v1")} + fooAtMainV2 = index.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v2")} - fooAtMainAndRelease = zoekt.Document{Name: "foo.go", Branches: []string{"main", "release"}, Content: []byte("common foo-main-and-release")} + fooAtMainAndRelease = index.Document{Name: "foo.go", Branches: []string{"main", "release"}, Content: []byte("common foo-main-and-release")} - barAtMain = zoekt.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main")} - barAtMainV2 = zoekt.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main-v2")} + barAtMain = index.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main")} + barAtMainV2 = index.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main-v2")} ) for _, test := range []struct { @@ -761,51 +631,50 @@ func TestDeltaShards(t *testing.T) { steps: []step{ { name: "setup", - documents: []zoekt.Document{barAtMain, fooAtMain}, + documents: []index.Document{barAtMain, fooAtMain}, query: "common", - expectedDocuments: []zoekt.Document{barAtMain, fooAtMain}, + expectedDocuments: []index.Document{barAtMain, fooAtMain}, }, { name: "add new version of foo, tombstone older ones", - documents: []zoekt.Document{fooAtMainV2}, - optFn: func(t *testing.T, o *Options) { + documents: []index.Document{fooAtMainV2}, + optFn: func(t *testing.T, o *index.Options) { o.IsDelta = true - o.changedOrRemovedFiles = []string{"foo.go"} }, query: "common", - expectedDocuments: []zoekt.Document{barAtMain, fooAtMainV2}, + changedFile: "foo.go", + expectedDocuments: []index.Document{barAtMain, fooAtMainV2}, }, { name: "add new version of bar, tombstone older ones", - documents: []zoekt.Document{barAtMainV2}, - optFn: func(t *testing.T, o *Options) { + documents: []index.Document{barAtMainV2}, + optFn: func(t *testing.T, o *index.Options) { o.IsDelta = true - o.changedOrRemovedFiles = []string{"bar.go"} }, query: "common", - expectedDocuments: []zoekt.Document{barAtMainV2, fooAtMainV2}, + changedFile: "bar.go", + expectedDocuments: []index.Document{barAtMainV2, fooAtMainV2}, }, - }, - }, + }}, { name: "tombstone older documents even if the latest shard has no documents", steps: []step{ { name: "setup", - documents: []zoekt.Document{barAtMain, fooAtMain}, + documents: []index.Document{barAtMain, fooAtMain}, query: "common", - expectedDocuments: []zoekt.Document{barAtMain, fooAtMain}, + expectedDocuments: []index.Document{barAtMain, fooAtMain}, }, { // a build with no documents could represent a deletion name: "tombstone older documents", documents: nil, - optFn: func(t *testing.T, o *Options) { + optFn: func(t *testing.T, o *index.Options) { o.IsDelta = true - o.changedOrRemovedFiles = []string{"foo.go"} }, query: "common", - expectedDocuments: []zoekt.Document{barAtMain}, + changedFile: "foo.go", + expectedDocuments: []index.Document{barAtMain}, }, }, }, @@ -814,19 +683,19 @@ func TestDeltaShards(t *testing.T) { steps: []step{ { name: "setup", - documents: []zoekt.Document{barAtMain, fooAtMainAndRelease}, + documents: []index.Document{barAtMain, fooAtMainAndRelease}, query: "common", - expectedDocuments: []zoekt.Document{barAtMain, fooAtMainAndRelease}, + expectedDocuments: []index.Document{barAtMain, fooAtMainAndRelease}, }, { name: "tombstone foo", documents: nil, - optFn: func(t *testing.T, o *Options) { + optFn: func(t *testing.T, o *index.Options) { o.IsDelta = true - o.changedOrRemovedFiles = []string{"foo.go"} }, query: "common", - expectedDocuments: []zoekt.Document{barAtMain}, + changedFile: "foo.go", + expectedDocuments: []index.Document{barAtMain}, }, }, }, @@ -857,7 +726,7 @@ func TestDeltaShards(t *testing.T) { return a.Name < b.Name }) - buildOpts := Options{ + buildOpts := index.Options{ IndexDir: indexDir, RepositoryDescription: repository, } @@ -867,7 +736,8 @@ func TestDeltaShards(t *testing.T) { step.optFn(t, &buildOpts) } - b, err := NewBuilder(buildOpts) + b, err := index.NewBuilder(buildOpts) + b.MarkFileAsChangedOrRemoved(step.changedFile) if err != nil { t.Fatalf("step %q: NewBuilder: %s", step.name, err) } @@ -880,7 +750,7 @@ func TestDeltaShards(t *testing.T) { } // Call b.Finish() multiple times to ensure that it is idempotent - for i := 0; i < 3; i++ { + for i := range 3 { err = b.Finish() if err != nil { @@ -894,11 +764,11 @@ func TestDeltaShards(t *testing.T) { } state, _ := buildOpts.IndexState() - if diff := cmp.Diff(IndexStateEqual, state); diff != "" { + if diff := cmp.Diff(index.IndexStateEqual, state); diff != "" { t.Errorf("unexpected diff in index state (-want +got):\n%s", diff) } - ss, err := shards.NewDirectorySearcher(indexDir) + ss, err := search.NewDirectorySearcher(indexDir) if err != nil { t.Fatalf("step %q: NewDirectorySearcher(%s): %s", step.name, indexDir, err) } @@ -912,17 +782,17 @@ func TestDeltaShards(t *testing.T) { t.Fatalf("step %q: Search(%q): %s", step.name, step.query, err) } - var receivedDocuments []zoekt.Document + var receivedDocuments []index.Document for _, f := range result.Files { - receivedDocuments = append(receivedDocuments, zoekt.Document{ + receivedDocuments = append(receivedDocuments, index.Document{ Name: f.FileName, Content: f.Content, }) } cmpOpts := []cmp.Option{ - cmpopts.IgnoreFields(zoekt.Document{}, "Branches"), - cmpopts.SortSlices(func(a, b zoekt.Document) bool { + cmpopts.IgnoreFields(index.Document{}, "Branches"), + cmpopts.SortSlices(func(a, b index.Document) bool { if a.Name < b.Name { return true } diff --git a/internal/e2e/e2e_rank_test.go b/internal/e2e/e2e_rank_test.go index d68715f21..007da722a 100644 --- a/internal/e2e/e2e_rank_test.go +++ b/internal/e2e/e2e_rank_test.go @@ -17,10 +17,10 @@ import ( "github.com/google/go-cmp/cmp" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/build" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/archive" "github.com/sourcegraph/zoekt/query" - "github.com/sourcegraph/zoekt/shards" + "github.com/sourcegraph/zoekt/search" ) var update = flag.Bool("update", false, "update golden file") @@ -42,7 +42,7 @@ func TestRanking(t *testing.T) { archiveURLs := []string{ "https://github.com/sourcegraph/sourcegraph-public-snapshot/tree/v5.2.2", // Nov 1 2023 "https://github.com/golang/go/tree/go1.21.4", // Nov 7 2023 - "https://github.com/sourcegraph/cody/tree/vscode-v0.14.5", // Nov 8 2023 + "https://github.com/sourcegraph/cody-public-snapshot/tree/vscode-v0.14.5", // Nov 8 2023 // The commit before ranking e2e tests were added to avoid matching // content inside our golden files. "https://github.com/sourcegraph/zoekt/commit/ef907c2371176aa3f97713d5bf182983ef090c6a", // Nov 17 2023 @@ -65,8 +65,8 @@ func TestRanking(t *testing.T) { q("Repository metadata Write rbac", "github.com/sourcegraph/sourcegraph-public-snapshot/internal/rbac/constants.go"), // unsure if this is the best doc? // cody - q("generate unit test", "github.com/sourcegraph/cody/lib/shared/src/chat/recipes/generate-test.ts"), - q("r:cody sourcegraph url", "github.com/sourcegraph/cody/lib/shared/src/sourcegraph-api/graphql/client.ts"), + q("generate unit test", "github.com/sourcegraph/cody-public-snapshot/lib/shared/src/chat/recipes/generate-test.ts"), + q("r:cody sourcegraph url", "github.com/sourcegraph/cody-public-snapshot/lib/shared/src/sourcegraph-api/graphql/client.ts"), // zoekt q("zoekt searcher", "github.com/sourcegraph/zoekt/api.go"), @@ -97,7 +97,7 @@ func TestRanking(t *testing.T) { } } - ss, err := shards.NewDirectorySearcher(indexDir) + ss, err := search.NewDirectorySearcher(indexDir) if err != nil { t.Fatalf("NewDirectorySearcher(%s): %v", indexDir, err) } @@ -248,7 +248,7 @@ func indexURL(indexDir, u string) error { // languageMap[lang] = ctags.ScipCTags // } - err := archive.Index(opts, build.Options{ + err := archive.Index(opts, index.Options{ IndexDir: indexDir, CTagsMustSucceed: true, RepositoryDescription: zoekt.Repository{ @@ -371,3 +371,15 @@ func requireCTags(tb testing.TB) { tb.Skip("universal-ctags is missing") } } + +func checkScipCTags() string { + if ctags := os.Getenv("SCIP_CTAGS_COMMAND"); ctags != "" { + return ctags + } + + if ctags, err := exec.LookPath("scip-ctags"); err == nil { + return ctags + } + + return "" +} diff --git a/internal/e2e/examples/example.bin b/internal/e2e/examples/example.bin new file mode 100644 index 000000000..e9feaef23 Binary files /dev/null and b/internal/e2e/examples/example.bin differ diff --git a/build/testdata/example.cc b/internal/e2e/examples/example.cc similarity index 100% rename from build/testdata/example.cc rename to internal/e2e/examples/example.cc diff --git a/build/testdata/example.java b/internal/e2e/examples/example.java similarity index 100% rename from build/testdata/example.java rename to internal/e2e/examples/example.java diff --git a/build/testdata/example.kt b/internal/e2e/examples/example.kt similarity index 100% rename from build/testdata/example.kt rename to internal/e2e/examples/example.kt diff --git a/build/testdata/example.py b/internal/e2e/examples/example.py similarity index 100% rename from build/testdata/example.py rename to internal/e2e/examples/example.py diff --git a/build/testdata/example.rb b/internal/e2e/examples/example.rb similarity index 100% rename from build/testdata/example.rb rename to internal/e2e/examples/example.rb diff --git a/build/testdata/example.scala b/internal/e2e/examples/example.scala similarity index 100% rename from build/testdata/example.scala rename to internal/e2e/examples/example.scala diff --git a/build/testdata/large_file.cc b/internal/e2e/examples/large_file.cc similarity index 100% rename from build/testdata/large_file.cc rename to internal/e2e/examples/large_file.cc diff --git a/internal/e2e/examples/test_example.py b/internal/e2e/examples/test_example.py new file mode 100644 index 000000000..550d54d49 --- /dev/null +++ b/internal/e2e/examples/test_example.py @@ -0,0 +1,11 @@ +import unittest + +class TestSimpleOperations(unittest.TestCase): + def test_addition(self): + self.assertEqual(2 + 2, 4) + + def test_string_upper(self): + self.assertEqual('hello'.upper(), 'HELLO') + +if __name__ == '__main__': + unittest.main() diff --git a/build/scoring_test.go b/internal/e2e/scoring_test.go similarity index 86% rename from build/scoring_test.go rename to internal/e2e/scoring_test.go index ea0bfc638..9bdb0afb6 100644 --- a/build/scoring_test.go +++ b/internal/e2e/scoring_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package build +package e2e import ( "context" @@ -21,9 +21,10 @@ import ( "testing" "github.com/sourcegraph/zoekt" - "github.com/sourcegraph/zoekt/ctags" + "github.com/sourcegraph/zoekt/index" + "github.com/sourcegraph/zoekt/internal/ctags" "github.com/sourcegraph/zoekt/query" - "github.com/sourcegraph/zoekt/shards" + "github.com/sourcegraph/zoekt/search" ) type scoreCase struct { @@ -66,7 +67,12 @@ func TestFileNameMatch(t *testing.T) { } func TestBM25(t *testing.T) { - exampleJava, err := os.ReadFile("./testdata/example.java") + exampleJava, err := os.ReadFile("./examples/example.java") + if err != nil { + t.Fatal(err) + } + + exampleBin, err := os.ReadFile("./examples/example.bin") if err != nil { t.Fatal(err) } @@ -78,8 +84,8 @@ func TestBM25(t *testing.T) { query: &query.Substring{Pattern: "example"}, content: exampleJava, language: "Java", - // bm25-score: 0.58 <- sum-termFrequencyScore: 14.00, length-ratio: 1.00 - wantScore: 0.58, + // sum-termFrequencyScore: 14.00, length-ratio: 1.00 + wantScore: 2.02, // line 5: private final int exampleField; wantBestLineMatch: 5, }, { @@ -92,10 +98,40 @@ func TestBM25(t *testing.T) { }}, content: exampleJava, language: "Java", - // bm25-score: 1.81 <- sum-termFrequencyScore: 116.00, length-ratio: 1.00 - wantScore: 1.81, - // line 3: public class InnerClasses { - wantBestLineMatch: 3, + // sum-termFrequencyScore: 116.00, length-ratio: 1.00 + wantScore: 6.30, + // line 54: private static B runInnerInterface(InnerInterface fn, A a) { + wantBestLineMatch: 54, + }, { + // another content-only match + fileName: "example.java", + query: &query.And{Children: []query.Q{ + &query.Substring{Pattern: "system"}, + &query.Substring{Pattern: "time"}, + }}, + content: exampleJava, + language: "Java", + // sum-termFrequencies: 12, length-ratio: 1.00 + wantScore: 3.33, + // line 59: if (System.nanoTime() > System.currentTimeMillis()) { + wantBestLineMatch: 59, + }, { + // phrase boosting + fileName: "example.java", + query: &query.Or{Children: []query.Q{ + &query.Boost{Child: &query.Substring{Pattern: "public string apply"}, Boost: 20}, + &query.And{Children: []query.Q{ + &query.Substring{Pattern: "public"}, + &query.Substring{Pattern: "string"}, + &query.Substring{Pattern: "apply"}, + }}, + }}, + content: exampleJava, + language: "Java", + // sum-termFrequencies: sum-termFrequencies: 40, length-ratio: 1.00 + wantScore: 140.80, + // public String apply(String s) { + wantBestLineMatch: 81, }, { // Matches only on filename @@ -103,16 +139,40 @@ func TestBM25(t *testing.T) { query: &query.Substring{Pattern: "java"}, content: exampleJava, language: "Java", - // bm25-score: 0.51 <- sum-termFrequencyScore: 5.00, length-ratio: 1.00 - wantScore: 0.51, + // sum-termFrequencyScore: 5.00, length-ratio: 1.00 + wantScore: 1.77, }, { // Matches only on filename, and content is missing fileName: "a/b/c/config.go", query: &query.Substring{Pattern: "config.go"}, language: "Go", - // bm25-score: 0.60 <- sum-termFrequencyScore: 5.00, length-ratio: 0.00 - wantScore: 0.60, + // sum-termFrequencyScore: 5.00, length-ratio: 0.00 + wantScore: 2.07, + }, + { + fileName: "example.py", + query: &query.Substring{Pattern: "example"}, + language: "Python", + // sum-termFrequencyScore: 5.00, length-ratio: 0.00 + wantScore: 2.07, + }, + { + // Match on test should be downweighted + fileName: "test_example.py", + query: &query.Substring{Pattern: "example"}, + language: "Python", + // sum-termFrequencyScore: 1.00, length-ratio: 0.00 + wantScore: 1.69, + }, + { + // Match on binary should be downweighted + fileName: "example.bin", + query: &query.Substring{Pattern: "example"}, + language: "", + content: exampleBin, + // sum-termFrequencyScore: 1.00, length-ratio: 1.00 + wantScore: 1.00, }, } @@ -122,7 +182,7 @@ func TestBM25(t *testing.T) { } func TestJava(t *testing.T) { - exampleJava, err := os.ReadFile("./testdata/example.java") + exampleJava, err := os.ReadFile("./examples/example.java") if err != nil { t.Fatal(err) } @@ -287,7 +347,7 @@ func TestJava(t *testing.T) { } func TestKotlin(t *testing.T) { - exampleKotlin, err := os.ReadFile("./testdata/example.kt") + exampleKotlin, err := os.ReadFile("./examples/example.kt") if err != nil { t.Fatal(err) } @@ -352,7 +412,7 @@ func TestKotlin(t *testing.T) { } func TestCpp(t *testing.T) { - exampleCpp, err := os.ReadFile("./testdata/example.cc") + exampleCpp, err := os.ReadFile("./examples/example.cc") if err != nil { t.Fatal(err) } @@ -409,7 +469,7 @@ func TestCpp(t *testing.T) { } func TestPython(t *testing.T) { - examplePython, err := os.ReadFile("./testdata/example.py") + examplePython, err := os.ReadFile("./examples/example.py") if err != nil { t.Fatal(err) } @@ -453,7 +513,7 @@ func TestPython(t *testing.T) { } func TestRuby(t *testing.T) { - exampleRuby, err := os.ReadFile("./testdata/example.rb") + exampleRuby, err := os.ReadFile("./examples/example.rb") if err != nil { t.Fatal(err) } @@ -493,7 +553,7 @@ func TestRuby(t *testing.T) { } func TestScala(t *testing.T) { - exampleScala, err := os.ReadFile("./testdata/example.scala") + exampleScala, err := os.ReadFile("./examples/example.scala") if err != nil { t.Fatal(err) } @@ -628,19 +688,17 @@ func checkScoring(t *testing.T, c scoreCase, useBM25 bool, parserType ctags.CTag t.Run(name, func(t *testing.T) { dir := t.TempDir() - opts := Options{ + opts := index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ Name: "repo", }, - LanguageMap: ctags.LanguageMap{ - normalizeLanguage(c.language): parserType, - }, + LanguageMap: ctags.LanguageMap{c.language: parserType}, } epsilon := 0.01 - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } @@ -651,7 +709,7 @@ func checkScoring(t *testing.T, c scoreCase, useBM25 bool, parserType ctags.CTag t.Fatalf("Finish: %v", err) } - ss, err := shards.NewDirectorySearcher(dir) + ss, err := search.NewDirectorySearcher(dir) if err != nil { t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) } @@ -692,16 +750,17 @@ func checkScoring(t *testing.T, c scoreCase, useBM25 bool, parserType ctags.CTag // helper to remove the tiebreaker from the score for easier comparison func withoutTiebreaker(fullScore float64, useBM25 bool) float64 { if useBM25 { + // BM25 doesn't use a tiebreaker return fullScore } - return math.Trunc(fullScore / zoekt.ScoreOffset) + return math.Trunc(fullScore / index.ScoreOffset) } func TestRepoRanks(t *testing.T) { requireCTags(t) dir := t.TempDir() - opts := Options{ + opts := index.Options{ IndexDir: dir, RepositoryDescription: zoekt.Repository{ Name: "repo", @@ -709,7 +768,7 @@ func TestRepoRanks(t *testing.T) { } searchQuery := &query.Substring{Content: true, Pattern: "Inner"} - exampleJava, err := os.ReadFile("./testdata/example.java") + exampleJava, err := os.ReadFile("./examples/example.java") if err != nil { t.Fatal(err) } @@ -745,12 +804,12 @@ func TestRepoRanks(t *testing.T) { Rank: c.repoRank, } - b, err := NewBuilder(opts) + b, err := index.NewBuilder(opts) if err != nil { t.Fatalf("NewBuilder: %v", err) } - err = b.Add(zoekt.Document{Name: "example.java", Content: exampleJava}) + err = b.Add(index.Document{Name: "example.java", Content: exampleJava}) if err != nil { t.Fatal(err) } @@ -759,7 +818,7 @@ func TestRepoRanks(t *testing.T) { t.Fatalf("Finish: %v", err) } - ss, err := shards.NewDirectorySearcher(dir) + ss, err := search.NewDirectorySearcher(dir) if err != nil { t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) } diff --git a/internal/e2e/testdata/r_cody_sourcegraph_url.txt b/internal/e2e/testdata/r_cody_sourcegraph_url.txt index 66d658ac2..7985f309f 100644 --- a/internal/e2e/testdata/r_cody_sourcegraph_url.txt +++ b/internal/e2e/testdata/r_cody_sourcegraph_url.txt @@ -2,36 +2,36 @@ queryString: r:cody sourcegraph url query: (and repo:cody substr:"sourcegraph" substr:"url") targetRank: 1 -**github.com/sourcegraph/cody/lib/shared/src/sourcegraph-api/graphql/client.ts** +**github.com/sourcegraph/cody-public-snapshot/lib/shared/src/sourcegraph-api/graphql/client.ts** 611: const url = buildGraphQLUrl({ request: query, baseUrl: this.config.serverEndpoint }) 626: const url = buildGraphQLUrl({ request: query, baseUrl: this.dotcomUrl.href }) 641: const url = 'http://localhost:49300/.api/testLogging' hidden 51 more line matches -github.com/sourcegraph/cody/vscode/src/completions/client.ts +github.com/sourcegraph/cody-public-snapshot/vscode/src/completions/client.ts 85: const url = getCodeCompletionsEndpoint() 1:import { FeatureFlag, featureFlagProvider } from '@sourcegraph/cody-shared/src/experimentation/FeatureFlagProvider' 5:} from '@sourcegraph/cody-shared/src/sourcegraph-api/completions/client' hidden 6 more line matches -github.com/sourcegraph/cody/vscode/scripts/download-wasm-modules.ts +github.com/sourcegraph/cody-public-snapshot/vscode/scripts/download-wasm-modules.ts 83: for (const url of urls) { 93:function getFilePathFromURL(url: string): string { 20:const urls = [ hidden 21 more line matches -github.com/sourcegraph/cody/slack/src/services/local-vector-store.ts +github.com/sourcegraph/cody-public-snapshot/slack/src/services/local-vector-store.ts 18: const { content, url } = codyNotice 9: owner: 'sourcegraph', 24: fileName: url, -github.com/sourcegraph/cody/lib/shared/src/sourcegraph-api/completions/client.ts +github.com/sourcegraph/cody-public-snapshot/lib/shared/src/sourcegraph-api/completions/client.ts 23:export abstract class SourcegraphCompletionsClient { 21: * Access the chat based LLM APIs via a Sourcegraph server instance. 36: return new URL('/.api/completions/stream', this.config.serverEndpoint).href hidden 1 more line matches -github.com/sourcegraph/cody/lib/shared/src/sourcegraph-api/completions/browserClient.ts +github.com/sourcegraph/cody-public-snapshot/lib/shared/src/sourcegraph-api/completions/browserClient.ts 8:export class SourcegraphBrowserCompletionsClient extends SourcegraphCompletionsClient { 5:import { SourcegraphCompletionsClient } from './client' 20: headersInstance.set('X-Sourcegraph-Should-Trace', 'true') diff --git a/internal/e2e/testdata/test_server.txt b/internal/e2e/testdata/test_server.txt index 8cfce010c..3e71041af 100644 --- a/internal/e2e/testdata/test_server.txt +++ b/internal/e2e/testdata/test_server.txt @@ -14,7 +14,7 @@ github.com/golang/go/src/net/rpc/server.go 197:func NewServer() *Server { hidden 104 more line matches -github.com/sourcegraph/cody/vscode/test/fixtures/mock-server.ts +github.com/sourcegraph/cody-public-snapshot/vscode/test/fixtures/mock-server.ts 126: const server = app.listen(SERVER_PORT, () => { 19:const SERVER_PORT = 49300 21:export const SERVER_URL = 'http://localhost:49300' diff --git a/internal/hybridre2/hybridre2.go b/internal/hybridre2/hybridre2.go new file mode 100644 index 000000000..8d946523c --- /dev/null +++ b/internal/hybridre2/hybridre2.go @@ -0,0 +1,148 @@ +// Copyright 2026 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package hybridre2 provides a hybrid regex engine that switches between +// grafana/regexp (an optimized fork of Go's stdlib regexp) and +// wasilibs/go-re2 (RE2 via WebAssembly) based on input size. +// +// Motivation: Go's regexp engine lacks a lazy DFA, making it O(n·m) for +// hard patterns. RE2's lazy DFA provides linear-time matching, which is +// dramatically faster for large inputs (>32KB) or complex patterns. For +// small inputs the WASM call overhead of go-re2 exceeds the savings, +// so grafana/regexp remains the better choice there. +// +// The threshold is controlled by the ZOEKT_RE2_THRESHOLD_BYTES environment +// variable, read once at program startup: +// +// - -1 (default): disabled, always use grafana/regexp +// - 0: always use go-re2 +// - N > 0: use go-re2 when len(input) >= N bytes +// +// # Known tradeoffs +// +// Memory: each Regexp holds compiled state for both engines when RE2 is +// enabled. Patterns are compiled per-search (not cached globally), so under +// high concurrency with many unique patterns the WASM heap adds up. Monitor +// RSS when first enabling the threshold in production. +// +// UTF-8 semantics: go-re2 stops at invalid UTF-8; grafana/regexp replaces +// invalid bytes with U+FFFD and continues. Results may differ on binary +// content that slips past content-type detection. See FindAllIndex for +// details. +// +// RE2 compilation failure: if RE2 rejects a pattern that grafana/regexp +// accepts (due to syntax differences between the two engines), Compile +// returns an error rather than silently falling back to grafana/regexp. +// This is intentional (fail-fast), but it means enabling the threshold +// could surface errors for edge-case patterns that work today. Patterns +// sourced from zoekt query parsing are validated before reaching this +// package, so this is unlikely in practice. +package hybridre2 + +import ( + "os" + "strconv" + "sync" + + grafanaregexp "github.com/grafana/regexp" + re2regexp "github.com/wasilibs/go-re2" +) + +const ( + // envThreshold is the environment variable name controlling the size + // threshold (bytes) at which go-re2 is used instead of grafana/regexp. + // Set to -1 (default) to disable go-re2 entirely, 0 to always use it. + envThreshold = "ZOEKT_RE2_THRESHOLD_BYTES" + + // disabled is the sentinel value meaning go-re2 is never used. + disabled = int64(-1) +) + +// threshold returns the configured byte threshold, reading +// ZOEKT_RE2_THRESHOLD_BYTES from the environment exactly once. +// Negative means disabled; zero means always use RE2. +// +// Tests may reassign this variable to override the threshold. +var threshold = sync.OnceValue(func() int64 { + if val, ok := os.LookupEnv(envThreshold); ok { + if n, err := strconv.ParseInt(val, 10, 64); err == nil { + return n + } + } + return disabled +}) + +// Regexp is a compiled regular expression that dispatches to either +// grafana/regexp or go-re2 at match time, based on input size. +type Regexp struct { + grafana *grafanaregexp.Regexp + re2 *re2regexp.Regexp // nil when threshold() < 0 (disabled) +} + +// Compile returns a new Regexp. The grafana/regexp variant is always compiled. +// The go-re2 variant is only compiled when ZOEKT_RE2_THRESHOLD_BYTES is set to +// a non-negative value; when RE2 is disabled (the default), skipping WASM +// compilation keeps the disabled path truly zero-cost. +func Compile(pattern string) (*Regexp, error) { + g, err := grafanaregexp.Compile(pattern) + if err != nil { + return nil, err + } + result := &Regexp{grafana: g} + if threshold() >= 0 { + r, err := re2regexp.Compile(pattern) + if err != nil { + return nil, err + } + result.re2 = r + } + return result, nil +} + +// MustCompile is like Compile but panics on error. +func MustCompile(pattern string) *Regexp { + re, err := Compile(pattern) + if err != nil { + panic("hybridre2: Compile(" + pattern + "): " + err.Error()) + } + return re +} + +// useRE2 reports whether the RE2 engine should be used for an input of the +// given length, based on the current threshold setting. +func useRE2(inputLen int) bool { + t := threshold() + return t >= 0 && int64(inputLen) >= t +} + +// FindAllIndex returns successive non-overlapping matches of the expression +// in b. It uses go-re2 when len(b) >= threshold() (and RE2 is enabled), +// and grafana/regexp otherwise. Match indices are relative to b. +// +// NOTE: go-re2 stops matching at invalid UTF-8 bytes, whereas grafana/regexp +// replaces them with U+FFFD and continues. This means results may differ on +// binary or non-UTF-8 content when RE2 is active. The default threshold of -1 +// (disabled) ensures zero behaviour change for existing deployments; operators +// enabling the threshold should be aware of this distinction. +func (re *Regexp) FindAllIndex(b []byte, n int) [][]int { + if re.re2 != nil && useRE2(len(b)) { + return re.re2.FindAllIndex(b, n) + } + return re.grafana.FindAllIndex(b, n) +} + +// String returns the source text used to compile the regular expression. +func (re *Regexp) String() string { + return re.grafana.String() +} diff --git a/internal/hybridre2/hybridre2_test.go b/internal/hybridre2/hybridre2_test.go new file mode 100644 index 000000000..3c3d508c7 --- /dev/null +++ b/internal/hybridre2/hybridre2_test.go @@ -0,0 +1,358 @@ +// Copyright 2026 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hybridre2 + +import ( + "fmt" + "testing" + + grafanaregexp "github.com/grafana/regexp" +) + +// withThreshold overrides the effective threshold for the duration of the test +// and registers a t.Cleanup to restore it afterwards. +// +// NOT safe for concurrent use: do not call t.Parallel() after withThreshold, +// and do not use it from TestMain or init(). +func withThreshold(tb testing.TB, thresh int64) { + tb.Helper() + old := threshold + threshold = func() int64 { return thresh } + tb.Cleanup(func() { threshold = old }) +} + +// ---- unit tests ---- + +func TestCompileValid(t *testing.T) { + _, err := Compile(`foo.*bar`) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestCompileInvalid(t *testing.T) { + _, err := Compile(`[invalid`) + if err == nil { + t.Fatal("expected error for invalid pattern, got nil") + } +} + +func TestMustCompilePanics(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Fatal("MustCompile should panic on invalid pattern") + } + }() + MustCompile(`[invalid`) +} + +func TestString(t *testing.T) { + const pat = `foo.*bar` + re := MustCompile(pat) + if re.String() != pat { + t.Fatalf("String() = %q, want %q", re.String(), pat) + } +} + +// TestFindAllIndexDisabled checks that with threshold=-1, we use grafana/regexp. +func TestFindAllIndexDisabled(t *testing.T) { + corpus := []byte("func main() { fmt.Println(\"hello world\") }") + patterns := []string{`\w+`, `fmt\.\w+`, `(?i)MAIN`, `"[^"]*"`} + + withThreshold(t, disabled) + for _, pat := range patterns { + hybrid := MustCompile(pat) + grafana := grafanaregexp.MustCompile(pat) + got := hybrid.FindAllIndex(corpus, -1) + want := grafana.FindAllIndex(corpus, -1) + if !equalIndexSlices(got, want) { + t.Errorf("disabled mode, pattern %q: hybrid=%v grafana=%v", pat, got, want) + } + } +} + +// TestFindAllIndexForcedRE2 checks that with threshold=0, go-re2 is used and +// produces identical results to grafana/regexp for standard patterns. +func TestFindAllIndexForcedRE2(t *testing.T) { + corpus := []byte("func main() { fmt.Println(\"hello world\") }") + patterns := []string{`\w+`, `fmt\.\w+`, `(?i)MAIN`, `"[^"]*"`} + + withThreshold(t, 0) + for _, pat := range patterns { + hybrid := MustCompile(pat) + grafana := grafanaregexp.MustCompile(pat) + got := hybrid.FindAllIndex(corpus, -1) + want := grafana.FindAllIndex(corpus, -1) + if !equalIndexSlices(got, want) { + t.Errorf("forced-re2 mode, pattern %q: hybrid=%v grafana=%v", pat, got, want) + } + } +} + +// TestThresholdSwitching verifies the engine switches at the configured byte boundary. +func TestThresholdSwitching(t *testing.T) { + const thresh = int64(512) + pattern := `func\s+\w+` + grafana := grafanaregexp.MustCompile(pattern) + + smallCorpus := makeCorpus(300) // < 512 + largeCorpus := makeCorpus(600) // >= 512 + + withThreshold(t, thresh) + hybrid := MustCompile(pattern) + + for _, tc := range []struct { + name string + corpus []byte + }{ + {"small(=threshold)", largeCorpus}, + } { + got := hybrid.FindAllIndex(tc.corpus, -1) + want := grafana.FindAllIndex(tc.corpus, -1) + if !equalIndexSlices(got, want) { + t.Errorf("%s: hybrid=%v grafana=%v", tc.name, got, want) + } + } +} + +// TestFindAllIndexIdenticalResults is a comprehensive correctness sweep across +// pattern types and input sizes, asserting identical match positions. +func TestFindAllIndexIdenticalResults(t *testing.T) { + patterns := []struct { + name string + pattern string + }{ + {"literal", `hello`}, + {"case-insensitive", `(?i)Hello`}, + {"word-boundary", `\bfunc\b`}, + {"alternation", `foo|bar|baz`}, + {"char-class", `[a-zA-Z_]\w*`}, + {"complex", `(func|var|const)\s+[A-Z]\w*`}, + {"dot-plus", `.+`}, + {"anchored-line", `(?m)^package\s+\w+`}, + {"no-match", `XYZZY_NEVER_MATCHES`}, + } + + sizes := []struct { + name string + size int + }{ + {"64B", 64}, + {"512B", 512}, + {"4KB", 4 * 1024}, + {"64KB", 64 * 1024}, + {"256KB", 256 * 1024}, + } + + // Force re2 path to test its correctness across all sizes. + withThreshold(t, 0) + for _, sz := range sizes { + corpus := makeCorpus(sz.size) + for _, pat := range patterns { + name := sz.name + "/" + pat.name + t.Run(name, func(t *testing.T) { + hybrid := MustCompile(pat.pattern) + grafana := grafanaregexp.MustCompile(pat.pattern) + + got := hybrid.FindAllIndex(corpus, -1) + want := grafana.FindAllIndex(corpus, -1) + if !equalIndexSlices(got, want) { + t.Errorf("pattern=%q size=%d: len(hybrid)=%d len(grafana)=%d", + pat.pattern, sz.size, len(got), len(want)) + if len(got) > 0 && len(want) > 0 { + t.Errorf(" first hybrid=%v first grafana=%v", got[0], want[0]) + } + } + }) + } + } +} + +// TestFindAllIndexLimitN verifies that the n parameter (match count limit) is +// honoured identically by both engines. +func TestFindAllIndexLimitN(t *testing.T) { + corpus := makeCorpus(64 * 1024) // large enough to have many matches + patterns := []string{`func\s+\w+`, `\bvar\b`, `[A-Z]\w*`} + + withThreshold(t, 0) // force re2 path + for _, pat := range patterns { + hybrid := MustCompile(pat) + grafana := grafanaregexp.MustCompile(pat) + + got := hybrid.FindAllIndex(corpus, 1) + want := grafana.FindAllIndex(corpus, 1) + if !equalIndexSlices(got, want) { + t.Errorf("n=1, pattern=%q: hybrid=%v grafana=%v", pat, got, want) + } + // Sanity: n=1 should return at most one match. + if len(got) > 1 { + t.Errorf("n=1, pattern=%q: got %d matches, want <= 1", pat, len(got)) + } + } +} + +// TestNoMatchReturnsEmpty verifies no-match returns nil/empty consistently. +func TestNoMatchReturnsEmpty(t *testing.T) { + corpus := makeCorpus(1024) + + for _, thresh := range []int64{disabled, 0} { + t.Run(fmt.Sprintf("thresh=%d", thresh), func(t *testing.T) { + withThreshold(t, thresh) + // MustCompile must be after withThreshold so that the lazy RE2 + // compilation in Compile() sees the overridden threshold and + // actually initialises re.re2 when thresh=0. + re := MustCompile(`XYZZY_NEVER_MATCHES`) + if got := re.FindAllIndex(corpus, -1); len(got) != 0 { + t.Errorf("thresh=%d: expected empty, got %v", thresh, got) + } + }) + } +} + +// ---- benchmarks ---- + +// BenchmarkEngines measures FindAllIndex performance for grafana/regexp vs +// go-re2 across multiple input sizes and pattern complexities. +// +// Run with: +// +// go test -bench=BenchmarkEngines -benchmem -benchtime=3s ./internal/hybridre2/ +func BenchmarkEngines(b *testing.B) { + patterns := []struct { + name string + pattern string + }{ + {"literal", `main`}, + {"case-insensitive", `(?i)func`}, + {"alternation-5", `func|var|const|type|import`}, + {"complex", `(func|var)\s+[A-Z]\w*\s*\(`}, + {"hard-no-match", `XYZZY_NEVER_MATCHES_AT_ALL`}, + } + + sizes := []struct { + name string + size int + }{ + {"512B", 512}, + {"4KB", 4 * 1024}, + {"32KB", 32 * 1024}, + {"128KB", 128 * 1024}, + {"512KB", 512 * 1024}, + } + + // Pre-build all corpora outside the benchmark loop. + corpora := make(map[string][]byte, len(sizes)) + for _, sz := range sizes { + corpora[sz.name] = makeCorpus(sz.size) + } + + for _, pat := range patterns { + grafanaRe := grafanaregexp.MustCompile(pat.pattern) + + for _, sz := range sizes { + corpus := corpora[sz.name] + name := pat.name + "/" + sz.name + + b.Run("grafana/"+name, func(b *testing.B) { + b.SetBytes(int64(len(corpus))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = grafanaRe.FindAllIndex(corpus, -1) + } + }) + + b.Run("go-re2/"+name, func(b *testing.B) { + withThreshold(b, 0) // force re2 for all sizes + // MustCompile must be after withThreshold so that re.re2 + // is initialised (lazy compilation checks threshold() at + // compile time, not match time). + hybridRe := MustCompile(pat.pattern) + b.SetBytes(int64(len(corpus))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = hybridRe.FindAllIndex(corpus, -1) + } + }) + } + } +} + +// ---- helpers ---- + +// makeCorpus returns a realistic-looking Go source corpus of approximately +// the requested size. +func makeCorpus(size int) []byte { + const template = `package main + +import ( + "fmt" + "strings" +) + +// Foo is an exported function that transforms its input. +func Foo(input string) string { + return strings.ToUpper(input) +} + +// Bar demonstrates calling Foo. +func Bar() { + result := Foo("hello world") + fmt.Println(result) +} + +var globalVar = "some value" +const MaxItems = 100 + +type MyStruct struct { + Name string + Value int +} + +func (m MyStruct) String() string { + return fmt.Sprintf("%s=%d", m.Name, m.Value) +} + +` + buf := make([]byte, 0, size) + for len(buf) < size { + buf = append(buf, []byte(template)...) + } + return buf[:size] +} + +func equalIndexSlices(a, b [][]int) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if !equalIntSlice(a[i], b[i]) { + return false + } + } + return true +} + +func equalIntSlice(a, b []int) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} diff --git a/json/json.go b/internal/json/json.go similarity index 100% rename from json/json.go rename to internal/json/json.go diff --git a/json/json_test.go b/internal/json/json_test.go similarity index 98% rename from json/json_test.go rename to internal/json/json_test.go index 4f52fc882..d63e80e58 100644 --- a/json/json_test.go +++ b/internal/json/json_test.go @@ -11,8 +11,8 @@ import ( "testing" "github.com/sourcegraph/zoekt" + zjson "github.com/sourcegraph/zoekt/internal/json" "github.com/sourcegraph/zoekt/internal/mockSearcher" - zjson "github.com/sourcegraph/zoekt/json" "github.com/sourcegraph/zoekt/query" ) diff --git a/internal/languages/language.go b/internal/languages/language.go deleted file mode 100644 index ec76d9458..000000000 --- a/internal/languages/language.go +++ /dev/null @@ -1,74 +0,0 @@ -// This file wraps the logic of go-enry (https://github.com/go-enry/go-enry) to support additional languages. -// go-enry is based off of a package called Linguist (https://github.com/github/linguist) -// and sometimes programming languages may not be supported by Linguist -// or may take a while to get merged in and make it into go-enry. This wrapper -// gives us flexibility to support languages in those cases. We list additional languages -// in this file and remove them once they make it into Linguist and go-enry. -// This logic is similar to what we have in the sourcegraph/sourcegraph repo, in the future -// we plan to refactor both into a common library to share between the two repos. -package languages - -import ( - "path/filepath" - "strings" - - "github.com/go-enry/go-enry/v2" -) - -var unsupportedByLinguistAliasMap = map[string]string{ - // Extensions for the Apex programming language - // See https://developer.salesforce.com/docs/atlas.en-us.apexcode.meta/apexcode/apex_dev_guide.htm - "apex": "Apex", - // Pkl Configuration Language (https://pkl-lang.org/) - // Add to linguist on 6/7/24 - // can remove once go-enry package updates - // to that linguist version - "pkl": "Pkl", - // Magik Language - "magik": "Magik", -} - -var unsupportedByLinguistExtensionToNameMap = map[string]string{ - ".apex": "Apex", - ".apxt": "Apex", - ".apxc": "Apex", - ".cls": "Apex", - ".trigger": "Apex", - // Pkl Configuration Language (https://pkl-lang.org/) - ".pkl": "Pkl", - // Magik Language - ".magik": "Magik", -} - -// getLanguagesByAlias is a replacement for enry.GetLanguagesByAlias -// It supports languages that are missing in linguist -func GetLanguageByAlias(alias string) (language string, ok bool) { - language, ok = enry.GetLanguageByAlias(alias) - if !ok { - normalizedAlias := strings.ToLower(alias) - language, ok = unsupportedByLinguistAliasMap[normalizedAlias] - } - - return -} - -// GetLanguage is a replacement for enry.GetLanguage -// to find out the most probable language to return but includes support -// for languages missing from linguist -func GetLanguage(filename string, content []byte) (language string) { - language = enry.GetLanguage(filename, content) - - // If go-enry failed to find language, fall back on our - // internal check for languages missing in linguist - if language == "" { - ext := filepath.Ext(filename) - normalizedExt := strings.ToLower(ext) - if ext == "" { - return - } - if lang, ok := unsupportedByLinguistExtensionToNameMap[normalizedExt]; ok { - language = lang - } - } - return -} diff --git a/internal/languages/language_test.go b/internal/languages/language_test.go deleted file mode 100644 index 25e2382a0..000000000 --- a/internal/languages/language_test.go +++ /dev/null @@ -1,107 +0,0 @@ -package languages - -import "testing" - -func TestGetLanguageByAlias(t *testing.T) { - tests := []struct { - name string - alias string - want string - wantOk bool - }{ - { - name: "empty alias", - alias: "", - want: "", - wantOk: false, - }, - { - name: "unknown alias", - alias: "unknown", - want: "", - wantOk: false, - }, - { - name: "supported alias", - alias: "go", - want: "Go", - wantOk: true, - }, - { - name: "unsupported by linguist alias", - alias: "magik", - want: "Magik", - wantOk: true, - }, - { - name: "unsupported by linguist alias normalized", - alias: "mAgIk", - want: "Magik", - wantOk: true, - }, - { - name: "apex example unsupported by linguist alias", - alias: "apex", - want: "Apex", - wantOk: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, ok := GetLanguageByAlias(tt.alias) - if got != tt.want || ok != tt.wantOk { - t.Errorf("GetLanguageByAlias(%q) = %q, %t, want %q, %t", tt.alias, got, ok, tt.want, tt.wantOk) - } - }) - } -} - -func TestGetLanguage(t *testing.T) { - tests := []struct { - name string - filename string - content []byte - want string - }{ - { - name: "empty filename", - filename: "", - content: []byte(""), - want: "", - }, - { - name: "unknown extension", - filename: "file.unknown", - content: []byte(""), - want: "", - }, - { - name: "supported extension", - filename: "file.go", - content: []byte("package main"), - want: "Go", - }, - { - name: "magik: unsupported by linguist extension", - filename: "file.magik", - content: []byte(""), - want: "Magik", - }, - { - name: "apex: unsupported by linguist extension", - filename: "file.apxc", - content: []byte(""), - want: "Apex", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := GetLanguage(tt.filename, tt.content) - if got != tt.want { - t.Errorf("GetLanguage(%q, %q) = %q, want %q", tt.filename, tt.content, got, tt.want) - } - }) - } -} diff --git a/internal/profiler/profiler.go b/internal/profiler/profiler.go index 84039ac92..916608c40 100644 --- a/internal/profiler/profiler.go +++ b/internal/profiler/profiler.go @@ -5,7 +5,8 @@ import ( "os" "cloud.google.com/go/profiler" - "github.com/sourcegraph/zoekt" + + "github.com/sourcegraph/zoekt/index" ) // Init starts the supported profilers IFF the environment variable is set. @@ -13,7 +14,7 @@ func Init(svcName string) { if os.Getenv("GOOGLE_CLOUD_PROFILER_ENABLED") != "" { err := profiler.Start(profiler.Config{ Service: svcName, - ServiceVersion: zoekt.Version, + ServiceVersion: index.Version, MutexProfiling: true, AllocForceGC: true, }) diff --git a/internal/tenant/context.go b/internal/tenant/context.go index 5f79f9270..9ddace626 100644 --- a/internal/tenant/context.go +++ b/internal/tenant/context.go @@ -11,7 +11,7 @@ import ( "github.com/sourcegraph/zoekt/internal/tenant/internal/enforcement" "github.com/sourcegraph/zoekt/internal/tenant/internal/tenanttype" "github.com/sourcegraph/zoekt/internal/tenant/systemtenant" - "github.com/sourcegraph/zoekt/trace" + "github.com/sourcegraph/zoekt/internal/trace" ) var ErrMissingTenant = fmt.Errorf("missing tenant") @@ -27,6 +27,10 @@ func FromContext(ctx context.Context) (*tenanttype.Tenant, error) { // Log logs the tenant ID to the trace. If tenant logging is enabled, it also // logs a stack trace to a pprof profile. func Log(ctx context.Context, tr *trace.Trace) { + if !enforceTenant() { + return + } + if systemtenant.Is(ctx) { tr.LazyPrintf("tenant: system") return diff --git a/internal/tenant/enforcement.go b/internal/tenant/enforcement.go index a5cab97af..3e65b7767 100644 --- a/internal/tenant/enforcement.go +++ b/internal/tenant/enforcement.go @@ -1,10 +1,12 @@ package tenant import ( + "os" + "github.com/sourcegraph/zoekt/internal/tenant/internal/enforcement" ) -func EnforceTenant() bool { +func enforceTenant() bool { switch enforcement.EnforcementMode.Load() { case "strict": return true @@ -12,3 +14,23 @@ func EnforceTenant() bool { return false } } + +// UseIDBasedShardNames returns true if the on disk layout of shards should +// instead use tenant ID and repository IDs in the names instead of the actual +// repository names. +// +// It is possible for repositories to have the same name, but have different +// content in a multi-tenant setup. As such, this implementation only returns +// true in those situations. +// +// Note: We could migrate all on-disk layout to only be ID based. However, +// ID's are a Sourcegraph specific feature so we will always need the two code +// paths. As such we only return true in multitenant setups. +// +// This is Sourcegraph specific. +func UseIDBasedShardNames() bool { + // We use the presence of this environment variable to tell if we are in a + // multi-tenant setup. This is the same check that is done in the + // Sourcegraph monorepo. + return os.Getenv("WORKSPACES_API_URL") != "" +} diff --git a/internal/tenant/query.go b/internal/tenant/query.go index 3f56cb721..bc8a38682 100644 --- a/internal/tenant/query.go +++ b/internal/tenant/query.go @@ -9,7 +9,7 @@ import ( // HasAccess returns true if the tenant ID in the context matches the // given ID. If tenant enforcement is disabled, it always returns true. func HasAccess(ctx context.Context, id int) bool { - if !EnforceTenant() { + if !enforceTenant() { return true } if systemtenant.Is(ctx) { diff --git a/internal/tenant/shards.go b/internal/tenant/shards.go deleted file mode 100644 index af4cbb103..000000000 --- a/internal/tenant/shards.go +++ /dev/null @@ -1,9 +0,0 @@ -package tenant - -import "fmt" - -// SrcPrefix returns the Sourcegraph prefix of a shard. We put it here to avoid -// circular dependencies. -func SrcPrefix(tenantID int, repoID uint32) string { - return fmt.Sprintf("%09d_%09d", tenantID, repoID) -} diff --git a/trace/middleware.go b/internal/trace/middleware.go similarity index 100% rename from trace/middleware.go rename to internal/trace/middleware.go diff --git a/trace/opentracing.go b/internal/trace/opentracing.go similarity index 55% rename from trace/opentracing.go rename to internal/trace/opentracing.go index 0589bb73b..640dd592d 100644 --- a/trace/opentracing.go +++ b/internal/trace/opentracing.go @@ -36,14 +36,3 @@ func GetOpenTracer(ctx context.Context, tracer opentracing.Tracer) opentracing.T } return tracer } - -// StartSpanFromContext starts a span using the tracer returned by invoking GetOpenTracer with the -// passed-in tracer. -func StartSpanFromContextWithTracer(ctx context.Context, tracer opentracing.Tracer, operationName string, opts ...opentracing.StartSpanOption) (opentracing.Span, context.Context) { - return opentracing.StartSpanFromContextWithTracer(ctx, GetOpenTracer(ctx, tracer), operationName, opts...) -} - -// StartSpanFromContext starts a span using the tracer returned by GetOpenTracer. -func StartSpanFromContext(ctx context.Context, operationName string, opts ...opentracing.StartSpanOption) (opentracing.Span, context.Context) { - return StartSpanFromContextWithTracer(ctx, GetOpenTracer(ctx, nil), operationName, opts...) -} diff --git a/trace/trace.go b/internal/trace/trace.go similarity index 87% rename from trace/trace.go rename to internal/trace/trace.go index b98955640..a47ab1082 100644 --- a/trace/trace.go +++ b/internal/trace/trace.go @@ -17,26 +17,14 @@ import ( nettrace "golang.org/x/net/trace" ) -// A Tracer for trace creation, parameterised over an -// opentracing.Tracer. Use this if you don't want to use -// the global tracer. -type Tracer struct { - Tracer opentracing.Tracer -} - func New(ctx context.Context, family, title string) (*Trace, context.Context) { - tr := Tracer{Tracer: GetOpenTracer(ctx, nil)} - return tr.New(ctx, family, title) -} - -// New returns a new Trace with the specified family and title. -func (t Tracer) New(ctx context.Context, family, title string) (*Trace, context.Context) { - span, ctx := StartSpanFromContextWithTracer( - ctx, - t.Tracer, + tracer := GetOpenTracer(ctx, nil) + span, ctx := opentracing.StartSpanFromContextWithTracer(ctx, + tracer, family, opentracing.Tag{Key: "title", Value: title}, ) + tr := nettrace.New(family, title) trace := &Trace{span: span, trace: tr, family: family} if parent := TraceFromContext(ctx); parent != nil { @@ -57,7 +45,7 @@ type Trace struct { // LazyPrintf evaluates its arguments with fmt.Sprintf each time the // /debug/requests page is rendered. Any memory referenced by a will be // pinned until the trace is finished and later discarded. -func (t *Trace) LazyPrintf(format string, a ...interface{}) { +func (t *Trace) LazyPrintf(format string, a ...any) { t.span.LogFields(Printf("log", format, a...)) t.trace.LazyPrintf(format, a...) } @@ -94,7 +82,7 @@ func (t *Trace) Finish() { // Printf is an opentracing log.Field which is a LazyLogger. So the format // string will only be evaluated if the trace is collected. In the case of // net/trace, it will only be evaluated on page load. -func Printf(key, f string, args ...interface{}) log.Field { +func Printf(key, f string, args ...any) log.Field { return log.Lazy(func(fv log.Encoder) { fv.EmitString(key, fmt.Sprintf(f, args...)) }) @@ -184,7 +172,7 @@ func (e *encoder) EmitFloat64(key string, value float64) { e.EmitString(key, strconv.FormatFloat(value, 'E', -1, 64)) } -func (e *encoder) EmitObject(key string, value interface{}) { +func (e *encoder) EmitObject(key string, value any) { e.EmitString(key, fmt.Sprintf("%+v", value)) } diff --git a/internal/tracer/jaeger.go b/internal/tracer/jaeger.go index c950b6d79..0c46d7ea5 100644 --- a/internal/tracer/jaeger.go +++ b/internal/tracer/jaeger.go @@ -46,6 +46,6 @@ func (l *jaegerLogger) Error(msg string) { } // Infof logs a message at info priority -func (l *jaegerLogger) Infof(msg string, args ...interface{}) { +func (l *jaegerLogger) Infof(msg string, args ...any) { log.Printf(msg, args...) } diff --git a/internal/tracer/opentelemetry.go b/internal/tracer/opentelemetry.go index 9fffc00e2..c438b2c6c 100644 --- a/internal/tracer/opentelemetry.go +++ b/internal/tracer/opentelemetry.go @@ -57,7 +57,7 @@ func configureOpenTelemetry(resource sglog.Resource) (opentracing.Tracer, error) semconv.ServiceNameKey.String(resource.Name), semconv.ServiceInstanceIDKey.String(resource.InstanceID), semconv.ServiceVersionKey.String(resource.Version))), - oteltracesdk.WithSampler(oteltracesdk.AlwaysSample()), + oteltracesdk.WithSampler(oteltracesdk.ParentBased(oteltracesdk.NeverSample())), oteltracesdk.WithSpanProcessor(processor), ) diff --git a/languages/enry_vendored.go b/languages/enry_vendored.go new file mode 100644 index 000000000..8bc442341 --- /dev/null +++ b/languages/enry_vendored.go @@ -0,0 +1,15 @@ +package languages + +import "strings" + +// This file contains private functions +// vendored from the go-enry codebase. + +// convertToAliasKey is vendored from go-enry to make sure +// we're normalizing strings the same way. +func convertToAliasKey(langName string) string { + ak := strings.SplitN(langName, `,`, 2)[0] + ak = strings.Replace(ak, ` `, `_`, -1) + ak = strings.ToLower(ak) + return ak +} diff --git a/languages/extensions.go b/languages/extensions.go new file mode 100644 index 000000000..d62716fa3 --- /dev/null +++ b/languages/extensions.go @@ -0,0 +1,489 @@ +package languages + +import ( + "path/filepath" + "slices" + "strings" + + "github.com/go-enry/go-enry/v2" + enrydata "github.com/go-enry/go-enry/v2/data" +) + +// GetLanguageByNameOrAlias returns the standardized name for +// a language based on its name (in which case this is an identity operation) +// or based on its alias, which is potentially an alternate name for +// the language. +// +// Aliases are fully lowercase, and map N-1 to languages. +// +// For example, +// +// GetLanguageByNameOrAlias("ada") == "Ada", true +// GetLanguageByNameOrAlias("ada95") == "Ada", true +// +// Historical note: This function was added for replacing usages of +// enry.GetLanguageByAlias, which, unlike the name suggests, also +// handles non-normalized names such as those with spaces. +func GetLanguageByNameOrAlias(nameOrAlias string) (lang string, ok bool) { + alias := convertToAliasKey(nameOrAlias) + if lang, ok = unsupportedByEnryAliasMap[alias]; ok { + return lang, true + } + + return enry.GetLanguageByAlias(alias) +} + +// GetLanguageExtensions returns the list of file extensions for a given +// language. Returned extensions are always prefixed with a '.'. +// +// The returned slice will be empty iff the language is not known. +// +// Handles more languages than enry.GetLanguageExtensions. +// +// Mutually consistent with getLanguagesByExtension, see the tests +// for the exact invariants. +func GetLanguageExtensions(language string) []string { + if langs, ok := unsupportedByEnryNameToExtensionMap[language]; ok { + return langs + } + + ignoreExts, isNiche := nicheExtensionUsages[language] + // Force a copy to avoid accidentally modifying the global variable + exts := slices.Clone(enry.GetLanguageExtensions(language)) + for ext, lang := range sgExtraLangsForExts { // Map is tiny, so linear lookup is fine + if language == lang { + exts = append(exts, ext) + } + } + if !isNiche { + return exts + } + return slices.DeleteFunc(exts, func(ext string) bool { + _, shouldIgnore := ignoreExts[ext] + return shouldIgnore + }) +} + +// getLanguagesByExtension is a replacement for enry.GetLanguagesByExtension +// to work around the following limitations: +// - For some extensions which are overwhelmingly used by a certain file type +// in practice, such as '.ts', '.md' and '.yaml', it returns ambiguous results. +// - It does not provide any information about binary files. +// - Some languages are not supported by enry yet (e.g. Magik) +func getLanguagesByExtension(path string) (candidates []string, isLikelyBinaryFile bool) { + // Lowercase extension before lookups to match enry's behavior. + ext := strings.ToLower(filepath.Ext(path)) + if ext == "" { + return nil, false + } + if lang, ok := unsupportedByEnryExtensionToNameMap[ext]; ok { + return []string{lang}, false + } + if _, ok := commonBinaryFileExtensions[ext[1:]]; ok { + return nil, true + } + if lang, ok := overrideAmbiguousExtensionsMap[ext]; ok { + return []string{lang}, false + } + candidates = enry.GetLanguagesByExtension(path, nil, nil) + if extra, ok := sgExtraLangsForExts[ext]; ok { + candidates = append(candidates, extra) + } + return candidates, false +} + +var commonBinaryFileExtensions = func() map[string]struct{} { + m := map[string]struct{}{} + for _, s := range commonBinaryFileExtensionsList { + m[s] = struct{}{} + } + return m +}() + +var sgExtraLangsForExts = map[string]string{ + ".c": "C++", + // NOTE: Downstream code does linear lookups on this map, so + // be careful if you're adding lots of entries here. +} + +var sgExtraContentHeuristics = map[string]*enrydata.Heuristics{ + ".c": enrydata.ContentHeuristics[".h"], +} + +// overrideAmbiguousExtensionsMap represents extensions which are ambiguous according to +// enry but not for Sourcegraph. +var overrideAmbiguousExtensionsMap = map[string]string{ + // Ignoring the uncommon usage of '.cs' for Smalltalk. + ".cs": "C#", + // The other languages are Filterscript, Forth, GLSL. Out of that, + // Forth and GLSL commonly use other extensions. Ignore Filterscript + // as it is niche. + ".fs": "F#", + // Ignoring the uncommon usage of '.html' for Ecmarkup. + ".html": "HTML", + // Ignoring other variants of JSON, such as OASv2-json and OASv3-json + ".json": "JSON", + // Not considering "GCC Machine Description". + ".md": "Markdown", + // The other main language using '.rs' is RenderScript, but that's deprecated. + // See https://developer.android.com/guide/topics/renderscript/compute + ".rs": "Rust", + // In i18n contexts, there are XML files with '.ts' and '.tsx' extensions, + // but we ignore those for now to avoid penalizing the common case. + ".tsx": "TSX", + ".ts": "TypeScript", + // Ignoring "Adblock Filter List" and "Vim Help File". + ".txt": "Text", + // Ignoring other variants of YAML, such as MiniYAML, OASv2-yaml, OASv3-yaml. + ".yaml": "YAML", + ".yml": "YAML", + // The PR adding Pkl support also listed another language called Pickle in + // its heuristics, but doesn't have any real support for it. Just ignore + // it. + // https://github.com/github-linguist/linguist/pull/6730/files#diff-c2d2d7946540ab501a5ef7a7f54a57c530d8da599e41c2beb0fd2f5635d2fd50R539 + ".pkl": "Pkl", +} + +// unsupportedByEnryExtensionToNameMap contains extension->name mappings +// for languages not tracked by go-enry. +var unsupportedByEnryExtensionToNameMap = map[string]string{ + // Extensions for the Apex programming language + // See https://developer.salesforce.com/docs/atlas.en-us.apexcode.meta/apexcode/apex_dev_guide.htm + ".apex": "Apex", + ".apxt": "Apex", + ".apxc": "Apex", + ".trigger": "Apex", + ".magik": "Magik", +} + +// nicheExtensionUsage keeps track of which (lang, extension) mappings +// should not be considered. +// +// We cannot wholesale ignore these languages, as this list includes +// languages like XML, but it can contain unusual extensions like '.tsx' +// which we generally want to classify as TypeScript. +var nicheExtensionUsages = func() map[string]map[string]struct{} { + niche := map[string]map[string]struct{}{} + considered := map[string]struct{}{} + for _, lang := range overrideAmbiguousExtensionsMap { + considered[lang] = struct{}{} + } + for ext := range overrideAmbiguousExtensionsMap { + langs := enry.GetLanguagesByExtension("foo"+ext, nil, nil) + for _, lang := range langs { + if _, found := considered[lang]; !found { + if m, hasMap := niche[lang]; hasMap { + m[ext] = struct{}{} + } else { + niche[lang] = map[string]struct{}{ext: {}} + } + } + } + } + for specialOverrideExt, lang := range unsupportedByEnryExtensionToNameMap { + considered[lang] = struct{}{} + langs := enry.GetLanguagesByExtension("foo"+specialOverrideExt, nil, nil) + for _, lang := range langs { + if _, found := considered[lang]; !found { + if m, hasMap := niche[lang]; hasMap { + m[specialOverrideExt] = struct{}{} + } else { + niche[lang] = map[string]struct{}{specialOverrideExt: {}} + } + } + } + } + return niche +}() + +// unsupportedByEnryNameToExtensionMap contains language->extension mappings +// for languages not tracked by go-enry. +var unsupportedByEnryNameToExtensionMap = reverseMap(unsupportedByEnryExtensionToNameMap) + +// unsupportedByEnryAliasMap maps alias -> language name for languages +// not tracked by go-enry. +var unsupportedByEnryAliasMap = func() map[string]string { + out := map[string]string{} + for _, lang := range unsupportedByEnryExtensionToNameMap { + out[convertToAliasKey(lang)] = lang + } + return out +}() + +func reverseMap(m map[string]string) map[string][]string { + n := make(map[string][]string, len(m)) + for k, v := range m { + n[v] = append(n[v], k) + } + return n +} + +// Source: https://github.com/sindresorhus/binary-extensions/blob/main/binary-extensions.json +// License: https://github.com/sindresorhus/binary-extensions/blob/main/license +// Replace the contents with +// curl -L https://raw.githubusercontent.com/sindresorhus/binary-extensions/main/binary-extensions.json | jq '.[]' | awk '{print $1 ","}' +// +// Not adding a leading '.' here to make it easier to update/compare the list. +var commonBinaryFileExtensionsList = []string{ + "3dm", + "3ds", + "3g2", + "3gp", + "7z", + "a", + "aac", + "adp", + "afdesign", + "afphoto", + "afpub", + "ai", + "aif", + "aiff", + "alz", + "ape", + "apk", + "appimage", + "ar", + "arj", + "asf", + "au", + "avi", + "bak", + "baml", + "bh", + "bin", + "bk", + "bmp", + "btif", + "bz2", + "bzip2", + "cab", + "caf", + "cgm", + "class", + "cmx", + "cpio", + "cr2", + "cur", + "dat", + "dcm", + "deb", + "dex", + "djvu", + "dll", + "dmg", + "dng", + "doc", + "docm", + "docx", + "dot", + "dotm", + "dra", + "DS_Store", + "dsk", + "dts", + "dtshd", + "dvb", + "dwg", + "dxf", + "ecelp4800", + "ecelp7470", + "ecelp9600", + "egg", + "eol", + "eot", + "epub", + "exe", + "f4v", + "fbs", + "fh", + "fla", + "flac", + "flatpak", + "fli", + "flv", + "fpx", + "fst", + "fvt", + "g3", + "gh", + "gif", + "graffle", + "gz", + "gzip", + "h261", + "h263", + "h264", + "icns", + "ico", + "ief", + "img", + "ipa", + "iso", + "jar", + "jpeg", + "jpg", + "jpgv", + "jpm", + "jxr", + "key", + "ktx", + "lha", + "lib", + "lvp", + "lz", + "lzh", + "lzma", + "lzo", + "m3u", + "m4a", + "m4v", + "mar", + "mdi", + "mht", + "mid", + "midi", + "mj2", + "mka", + "mkv", + "mmr", + "mng", + "mobi", + "mov", + "movie", + "mp3", + "mp4", + "mp4a", + "mpeg", + "mpg", + "mpga", + "mxu", + "nef", + "npx", + "numbers", + "nupkg", + "o", + "odp", + "ods", + "odt", + "oga", + "ogg", + "ogv", + "otf", + "ott", + "pages", + "pbm", + "pcx", + "pdb", + "pdf", + "pea", + "pgm", + "pic", + "png", + "pnm", + "pot", + "potm", + "potx", + "ppa", + "ppam", + "ppm", + "pps", + "ppsm", + "ppsx", + "ppt", + "pptm", + "pptx", + "psd", + "pya", + "pyc", + "pyo", + "pyv", + "qt", + "rar", + "ras", + "raw", + "resources", + "rgb", + "rip", + "rlc", + "rmf", + "rmvb", + "rpm", + "rtf", + "rz", + "s3m", + "s7z", + "scpt", + "sgi", + "shar", + "snap", + "sil", + "sketch", + "slk", + "smv", + "snk", + "so", + "stl", + "suo", + "sub", + "swf", + "tar", + "tbz", + "tbz2", + "tga", + "tgz", + "thmx", + "tif", + "tiff", + "tlz", + "ttc", + "ttf", + "txz", + "udf", + "uvh", + "uvi", + "uvm", + "uvp", + "uvs", + "uvu", + "viv", + "vob", + "war", + "wav", + "wax", + "wbmp", + "wdp", + "weba", + "webm", + "webp", + "whl", + "wim", + "wm", + "wma", + "wmv", + "wmx", + "woff", + "woff2", + "wrm", + "wvx", + "xbm", + "xif", + "xla", + "xlam", + "xls", + "xlsb", + "xlsm", + "xlsx", + "xlt", + "xltm", + "xltx", + "xm", + "xmind", + "xpi", + "xpm", + "xwd", + "xz", + "z", + "zip", + "zipx", +} diff --git a/languages/extensions_test.go b/languages/extensions_test.go new file mode 100644 index 000000000..57dc814d2 --- /dev/null +++ b/languages/extensions_test.go @@ -0,0 +1,205 @@ +package languages + +import ( + "slices" + "strings" + "testing" + + "github.com/go-enry/go-enry/v2" + enrydata "github.com/go-enry/go-enry/v2/data" + "github.com/stretchr/testify/require" +) + +// Languages/extensions that we don't want to regress +var nonAmbiguousExtensionsCheck = map[string]string{ + ".apex": "Apex", + ".apxt": "Apex", + ".apxc": "Apex", + ".trigger": "Apex", + ".js": "JavaScript", + // Linguist removed JSX (but not TSX) as a separate language: + // https://github.com/github-linguist/linguist/pull/5133 + ".jsx": "JavaScript", + ".ts": "TypeScript", + ".tsx": "TSX", + ".py": "Python", + ".rb": "Ruby", + ".go": "Go", + ".java": "Java", + ".kt": "Kotlin", + ".magik": "Magik", + ".scala": "Scala", + ".cs": "C#", + ".fs": "F#", + ".rs": "Rust", + // ".c" is not included as we consider it ambiguous (C or C++) (SPLF-1309) + ".cpp": "C++", + ".cxx": "C++", + ".html": "HTML", + ".hpp": "C++", + ".hxx": "C++", + ".lua": "Lua", + ".dart": "Dart", + ".swift": "Swift", + ".css": "CSS", + ".json": "JSON", + ".yml": "YAML", + ".xml": "XML", + ".pkl": "Pkl", +} + +func TestGetLanguageByAlias_UnsupportedLanguages(t *testing.T) { + for alias, name := range unsupportedByEnryAliasMap { + resName, _ := GetLanguageByNameOrAlias(alias) + require.Equal(t, name, resName, + "maybe a typo in `unsupportedByEnryAliasMap`?") + } +} + +func TestGetLanguageByAlias_NonAmbiguousLanguages(t *testing.T) { + for _, language := range nonAmbiguousExtensionsCheck { + _, ok := GetLanguageByNameOrAlias(language) + require.True(t, ok, + "unable to find language %s in go-enry", language) + } +} + +func TestGetLanguageExtensions_UnsupportedExtensions(t *testing.T) { + for language, exts := range unsupportedByEnryNameToExtensionMap { + extensions := GetLanguageExtensions(language) + for _, ext := range exts { + require.Contains(t, extensions, ext, + "maybe a typo in `unsupportedByEnryNameToExtensionMap`?") + } + } +} + +func TestGetLanguageExtensions_NonAmbiguousExtensions(t *testing.T) { + langMap := reverseMap(nonAmbiguousExtensionsCheck) + for language, exts := range langMap { + extensions := GetLanguageExtensions(language) + for _, ext := range exts { + require.Contains(t, extensions, ext, + "If this test fails when updating enry, maybe `overrideAmbiguousExtensionsMap` needs updating") + } + } +} + +func TestGetLanguagesByExtension_UnsupportedExtensions(t *testing.T) { + for ext, language := range unsupportedByEnryExtensionToNameMap { + filename := "foo" + ext + languages, _ := getLanguagesByExtension(filename) + require.Contains(t, languages, language, + "maybe a typo in `unsupportedByEnryExtensionToNameMap`?") + } +} + +func TestGetLanguagesByExtension_OverrideExtensions(t *testing.T) { + for ext, language := range overrideAmbiguousExtensionsMap { + filename := "foo" + ext + enryLangs := enry.GetLanguagesByExtension(filename, nil, nil) + + require.Contains(t, enryLangs, language, + "maybe a typo in `overrideAmbiguousExtensionsMap`?") + require.Greaterf(t, len(enryLangs), 1, + "extension %v is not ambiguous according to enry, remove it from `overrideAmbiguousExtensionsMap`", + ext) + } +} + +func TestGetLanguagesByExtension_NonAmbiguousExtensions(t *testing.T) { + for ext, language := range nonAmbiguousExtensionsCheck { + filename := "foo" + ext + languages, isLikelyBinaryFile := getLanguagesByExtension(filename) + require.False(t, isLikelyBinaryFile) + require.Equal(t, []string{language}, languages, + "If this test fails when updating enry, maybe `overrideAmbiguousExtensionsMap` needs updating") + } +} + +func TestGetLanguagesByExtension_BinaryExtensions(t *testing.T) { + for _, ext := range []string{".png", ".jpg", ".gif"} { + filename := "foo" + ext + _, isLikelyBinary := getLanguagesByExtension(filename) + require.Truef(t, isLikelyBinary, "filename: %v was not guessed to be binary;"+ + "bug in extension matching logic in getLanguagesByExtension maybe?", + filename) + } +} + +func TestExtensionsConsistency(t *testing.T) { + for ext, overrideLang := range overrideAmbiguousExtensionsMap { + filepath := "foo" + ext + enryLangsForExt := enry.GetLanguagesByExtension(filepath, nil, nil) + require.Containsf(t, enryLangsForExt, overrideLang, "overrideAmbiguousExtensionsMap maps extension %q to language %q but "+ + "that mapping is not present in enry's list %v", ext, overrideLang, enryLangsForExt) + require.Greaterf(t, len(enryLangsForExt), 1, "overrideAmbiguousExtensionsMap states that"+ + "%q extension is ambiguous, but only found langs: %v", ext, enryLangsForExt) + + candidates, isLikelyBinary := getLanguagesByExtension(filepath) + require.False(t, isLikelyBinary, "ambiguous files are all source code") + require.True(t, len(candidates) == 1, "getLanguagesByExtension should respect overrideAmbiguousExtensionsMap") + + shouldBeIgnoredLangsForExt := slices.DeleteFunc(enryLangsForExt, func(s string) bool { + return s == overrideLang + }) + for _, shouldBeIgnoredLang := range shouldBeIgnoredLangsForExt { + ignoredExts, found := nicheExtensionUsages[shouldBeIgnoredLang] + require.Truef(t, found, "expected lang: %q to have an entry in nicheExtensionUsages for consistency with GetLanguagesByExtension", shouldBeIgnoredLang) + require.Truef(t, len(ignoredExts) >= 1, "sets in nicheExtensionUsages must be non-empty") + + nonNicheExts := GetLanguageExtensions(shouldBeIgnoredLang) + for ignoredExt := range ignoredExts { + require.Falsef(t, slices.Contains(nonNicheExts, ignoredExt), + "GetLanguageExtensions should not return %q for lang %q for consistency with GetLanguagesByExtension", + ignoredExt, shouldBeIgnoredLang) + } + } + } +} + +func TestExtensionsConsistency2(t *testing.T) { + for lang := range enrydata.ExtensionsByLanguage { + for _, ext := range GetLanguageExtensions(lang) { + if strings.Count(ext, ".") > 1 { + // Ignore unusual edge cases like .coffee.md for Literate CoffeeScript + continue + } + langsByExt, isLikelyBinary := getLanguagesByExtension("foo" + ext) + if !isLikelyBinary { + require.Truef(t, slices.Contains(langsByExt, lang), + "expected getLanguagesByExtension result %v to contain %q (extension: %q)", langsByExt, lang, ext) + } + } + } +} + +func TestUnsupportedByEnry(t *testing.T) { + for lang := range unsupportedByEnryNameToExtensionMap { + enry_extensions, found := enrydata.ExtensionsByLanguage[lang] + if found { + validateLanguageAgainstGoEnry(t, "unsupportedByEnryNameToExtensionMap", enry_extensions, lang) + } + } + for _, lang := range unsupportedByEnryAliasMap { + enry_extensions, found := enrydata.ExtensionsByLanguage[lang] + if found { + validateLanguageAgainstGoEnry(t, "unsupportedByEnryAliasMap", enry_extensions, lang) + } + } + for _, lang := range unsupportedByEnryExtensionToNameMap { + enry_extensions, found := enrydata.ExtensionsByLanguage[lang] + if found { + validateLanguageAgainstGoEnry(t, "unsupportedByEnryExtensionToNameMap", enry_extensions, lang) + } + } +} + +func validateLanguageAgainstGoEnry(t *testing.T, name string, enryExtensions []string, lang string) { + enryExtensions = slices.Clone(enryExtensions) + slices.Sort(enryExtensions) + sgExtensions := slices.Clone(unsupportedByEnryNameToExtensionMap[lang]) + slices.Sort(sgExtensions) + + require.NotEqualf(t, enryExtensions, sgExtensions, "looks like language %q is supported by enry with the same extensions; remove it from %q", lang, name) +} diff --git a/languages/languages.go b/languages/languages.go new file mode 100644 index 000000000..1eb35aff0 --- /dev/null +++ b/languages/languages.go @@ -0,0 +1,151 @@ +// Package languages provides enhanced language detection capabilities on top of +// go-enry, with additional heuristics and mappings for better accuracy. +package languages + +import ( + "path/filepath" + "slices" + "strings" + + "github.com/go-enry/go-enry/v2" +) + +// Make sure all names are lowercase here, since they are normalized +var enryLanguageMappings = map[string]string{ + "c++": "cpp", + "c#": "c_sharp", +} + +// NormalizeLanguage converts the language name to lowercase and maps known +// aliases to their canonical names. +func NormalizeLanguage(filetype string) string { + normalized := strings.ToLower(filetype) + if mapped, ok := enryLanguageMappings[normalized]; ok { + normalized = mapped + } + + return normalized +} + +// GetLanguages is a replacement for enry.GetLanguages which +// avoids incorrect fallback behavior that is present in DefaultStrategies, +// where it will misclassify '.h' header files as C when file contents +// are not available. +// +// The content can be optionally passed via a callback instead of directly, so +// that in the common case, the caller can avoid fetching the content. The full +// content returned by getContent will be used for language detection. +// +// getContent is not called if the file is likely to be a binary file, +// as enry only covers programming languages. +// +// The buffer provided by the getContent callback is not modified. +// +// Returns: +// - An error if the getContent func returns an error +// - An empty slice if language detection failed +// - A single-element slice if the language was determined exactly +// - A multi-element slice if the language was ambiguous. For example, +// for simple `.h` files with just comments and macros, they may +// be valid C, C++ or any of their derivative languages (e.g. Objective-C). +func GetLanguages(path string, getContent func() ([]byte, error)) ([]string, error) { + impl := func() ([]string, error) { + langs := enry.GetLanguagesByFilename(path, nil, nil) + if len(langs) == 1 { + return langs, nil + } + newLangs, isLikelyBinaryFile := getLanguagesByExtension(path) + if isLikelyBinaryFile { + return nil, nil + } + switch len(newLangs) { + case 0: + break + case 1: + return newLangs, nil + default: + langs = newLangs + } + if getContent == nil { + return langs, nil + } + content, err := getContent() + if err != nil { + return nil, err + } + if len(content) == 0 { + return langs, nil + } + if enry.IsBinary(content) { + return nil, nil + } + + // enry doesn't expose a way to call GetLanguages with a specific set of + // strategies, so just hand-roll that code here. + var languages = langs + for _, strategy := range []enry.Strategy{enry.GetLanguagesByModeline, getLanguagesByShebang, getLanguagesByContent, enry.GetLanguagesByClassifier} { + candidates := strategy(path, content, languages) + switch len(candidates) { + case 0: + continue + case 1: + return candidates, nil + default: + languages = candidates + } + } + + return languages, nil + } + + langs, err := impl() + return slices.Clone(langs), err +} + +// GetLanguagesFromContent is a convenience wrapper around GetLanguages that +// allows passing the content directly instead of a callback. +func GetLanguagesFromContent(path string, content []byte) (langs []string) { + // We can ignore the error here, because the callback will never return an error + langs, _ = GetLanguages(path, func() ([]byte, error) { return content, nil }) + return +} + +// getLanguagesByContent is a wrapper for enry.GetLanguagesByContent. +// +// It applies additional heuristics for file extensions that need special handling. +func getLanguagesByContent(path string, content []byte, candidates []string) []string { + ext := strings.ToLower(filepath.Ext(path)) + if heuristic, ok := sgExtraContentHeuristics[ext]; ok { + return heuristic.Match(content) + } + return enry.GetLanguagesByContent(path, content, candidates) +} + +// getLanguagesByShebang is a replacement for enry.GetLanguagesByShebang. +// +// The enry function considers non-programming languages such as 'Pod'/'Pod 6' +// also for shebangs, so work around that. +func getLanguagesByShebang(path string, content []byte, candidates []string) []string { + languages := enry.GetLanguagesByShebang(path, content, candidates) + if len(languages) == 2 { + // See https://sourcegraph.com/github.com/go-enry/go-enry@40f2a1e5b90eec55c20441c2a5911dcfc298a447/-/blob/data/interpreter.go?L95-96 + if slices.Equal(languages, []string{"Perl", "Pod"}) { + return []string{"Perl"} + } + if slices.Equal(languages, []string{"Pod 6", "Raku"}) { + return []string{"Raku"} + } + } + return slices.Clone(languages) +} + +// IsLikelyVendoredFile returns true if the file is likely to be a vendored file. +// +// 1. This method is not 100% foolproof, as it relies on conventions +// around file paths which may or may not be followed. +// 2. The caller must not pass a directory path to this function +// for short-circuiting, as there is no guarantee that if a path +// p1 returns true, then Join(p1, p2) also returns true. +func IsLikelyVendoredFile(path string) bool { + return enry.IsVendor(path) +} diff --git a/languages/languages_test.go b/languages/languages_test.go new file mode 100644 index 000000000..bc790a064 --- /dev/null +++ b/languages/languages_test.go @@ -0,0 +1,184 @@ +package languages + +import ( + "testing" + + "github.com/go-enry/go-enry/v2" + "github.com/stretchr/testify/require" + "pgregory.net/rapid" +) + +var cppCapitalExtContent = `// Sample C++ file from the ROSE compiler project +// Original source: https://github.com/rose-compiler/rose +// This file is used for testing C++ language detection for files with .C extension +// Attribution: ROSE Compiler Team - Lawrence Livermore National Laboratory + +#include "sage3basic.h" +#include "rose_config.h" + +#include "SageTreeBuilder.h" +#include "Jovial_to_ROSE_translation.h" +#include "ModuleBuilder.h" + +#include +#include + +namespace Rose { +namespace builder { + +using namespace Rose::Diagnostics; + +namespace SB = SageBuilder; +namespace SI = SageInterface; +namespace LT = LanguageTranslation; +` + +func TestGetLanguages(t *testing.T) { + const matlabContent = "function [out] = square(x)\nout = x * x;\nend" + const mathematicaContent = "f[x_] := x ^ 2\ng[y_] := f[y]" + const cppContent = "namespace x { }" + const cContent = "typedef struct { int x; } Int;" + const emptyContent = "" + + testCases := []struct { + path string + content string + expectedLanguages []string + compareFirstOnly bool + }{ + {path: "perlscript", content: "#!/usr/bin/env perl\n$version = $ARGV[0];", expectedLanguages: []string{"Perl"}}, + {path: "rakuscript", content: "#!/usr/bin/env perl6\n$version = $ARGV[0];", expectedLanguages: []string{"Raku"}}, + {path: "ambiguous.h", content: emptyContent, expectedLanguages: []string{"C", "C++", "Objective-C"}}, + {path: "cpp.h", content: cppContent, expectedLanguages: []string{"C++"}}, + {path: "c.h", content: cContent, expectedLanguages: []string{"C"}}, + {path: "matlab.m", content: matlabContent, expectedLanguages: []string{"MATLAB"}, compareFirstOnly: true}, + {path: "mathematica.m", content: mathematicaContent, expectedLanguages: []string{"Mathematica"}, compareFirstOnly: true}, + { + path: "mathematica2.m", + content: ` +s := StringRiffle[{"a", "b", "c", "d", "e"}, ", "] +Flatten[{{a, b}, {c, {d}, e}, {f, {g, h}}}] +square[x_] := x ^ 2 +fourthpower[x_] := square[square[x]] +`, + expectedLanguages: []string{"Mathematica"}, + compareFirstOnly: true, + }, + {path: "SageTreeBuilder.C", content: cppCapitalExtContent, expectedLanguages: []string{"C++"}}, + // Ported cases from internal/languages TestGetLanguage + {path: "", content: emptyContent, expectedLanguages: nil}, + {path: "file.unknown", content: emptyContent, expectedLanguages: nil}, + {path: "file.go", content: "package main", expectedLanguages: []string{"Go"}}, + {path: "file.magik", content: emptyContent, expectedLanguages: []string{"Magik"}}, + {path: "file.apxc", content: emptyContent, expectedLanguages: []string{"Apex"}}, + // Check that we classify cls files by content and not just by extension + {path: "tex.cls", content: `\DeclareOption*{}`, expectedLanguages: []string{"TeX", "Apex", "ObjectScript", "Visual Basic 6.0", "OpenEdge ABL", "VBA"}}, + {path: "tex.cls", content: `public class HelloWorld {`, expectedLanguages: []string{"Apex", "Visual Basic 6.0", "TeX", "OpenEdge ABL", "ObjectScript", "VBA"}}, + } + + for _, testCase := range testCases { + var getContent func() ([]byte, error) + if testCase.content != "" { + getContent = func() ([]byte, error) { return []byte(testCase.content), nil } + } + gotLanguages, err := GetLanguages(testCase.path, getContent) + require.NoError(t, err) + if testCase.compareFirstOnly { + require.Equal(t, testCase.expectedLanguages, gotLanguages[0:1]) + continue + } + require.Equal(t, testCase.expectedLanguages, gotLanguages) + } + + rapid.Check(t, func(t *rapid.T) { + path := rapid.String().Draw(t, "path") + content := rapid.SliceOfN(rapid.Byte(), 0, 100).Draw(t, "contents") + require.NotPanics(t, func() { + langs, err := GetLanguages(path, func() ([]byte, error) { return content, nil }) + require.NoError(t, err) + if len(langs) != 0 { + for _, l := range langs { + require.NotEqual(t, enry.OtherLanguage, l) + } + } + }) + }) + + rapid.Check(t, func(t *rapid.T) { + baseName := "abcd" + exts := []string{".h", ".m", ".unknown", ""} + extGens := []*rapid.Generator[string]{} + for _, ext := range exts { + extGens = append(extGens, rapid.Just(ext)) + } + extension := rapid.OneOf(extGens...).Draw(t, "extension") + path := baseName + extension + contentGens := []*rapid.Generator[string]{} + for _, content := range []string{cContent, cppContent, mathematicaContent, matlabContent, emptyContent} { + contentGens = append(contentGens, rapid.Just(content)) + } + content := rapid.OneOf(contentGens...).Draw(t, "content") + langs, err := GetLanguages(path, func() ([]byte, error) { + return []byte(content), nil + }) + require.NoError(t, err) + for _, lang := range langs { + require.NotEqual(t, enry.OtherLanguage, lang) + } + }) +} + +func TestGetLanguageByNameOrAlias(t *testing.T) { + tests := []struct { + name string + alias string + want string + wantOk bool + }{ + { + name: "empty alias", + alias: "", + want: "", + wantOk: false, + }, + { + name: "unknown alias", + alias: "unknown", + want: "", + wantOk: false, + }, + { + name: "supported alias", + alias: "go", + want: "Go", + wantOk: true, + }, + { + name: "unsupported by linguist alias", + alias: "magik", + want: "Magik", + wantOk: true, + }, + { + name: "unsupported by linguist alias normalized", + alias: "mAgIk", + want: "Magik", + wantOk: true, + }, + { + name: "apex example unsupported by linguist alias", + alias: "apex", + want: "Apex", + wantOk: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, ok := GetLanguageByNameOrAlias(tt.alias) + if got != tt.want || ok != tt.wantOk { + t.Errorf("GetLanguageByNameOrAlias(%q) = %q, %t, want %q, %t", tt.alias, got, ok, tt.want, tt.wantOk) + } + }) + } +} diff --git a/marshal.go b/marshal.go index 5e733a7b6..f03e20cd1 100644 --- a/marshal.go +++ b/marshal.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/binary" "fmt" + "slices" "unsafe" ) @@ -106,7 +107,7 @@ func reposMapDecode(b []byte) (ReposMap, error) { // don't own b, so we create a copy of it. r := binaryReader{ typ: "ReposMap", - b: append([]byte{}, b...), + b: slices.Clone(b), } // Version @@ -129,7 +130,7 @@ func reposMapDecode(b []byte) (ReposMap, error) { allBranchesLen := r.uvarint() allBranches := make([]RepositoryBranch, 0, allBranchesLen) - for i := 0; i < l; i++ { + for range l { repoID := r.uvarint() hasSymbols := r.byt() == 1 var indexTimeUnix int64 @@ -137,7 +138,7 @@ func reposMapDecode(b []byte) (ReposMap, error) { indexTimeUnix = int64(r.uvarint()) } lb := r.uvarint() - for i := 0; i < lb; i++ { + for range lb { allBranches = append(allBranches, RepositoryBranch{ Name: r.str(), Version: r.str(), diff --git a/marshal_test.go b/marshal_test.go index 9396769c1..a4a91b47b 100644 --- a/marshal_test.go +++ b/marshal_test.go @@ -74,7 +74,7 @@ func TestRepoList_Marshal(t *testing.T) { func genRepoList(size int) *RepoList { m := make(ReposMap, size) indexTime := time.Now().Unix() - for i := 0; i < size; i++ { + for i := range size { m[uint32(i)] = MinimalRepoListEntry{ HasSymbols: true, IndexTimeUnix: indexTime, diff --git a/build/builder_unix.go b/query/doc.go similarity index 69% rename from build/builder_unix.go rename to query/doc.go index edbd87b06..7a922e89a 100644 --- a/build/builder_unix.go +++ b/query/doc.go @@ -1,5 +1,3 @@ -// Copyright 2018 Google Inc. All rights reserved. -// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -11,19 +9,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// -//go:build !windows && !wasm -// +build !windows,!wasm - -package build - -import ( - "os" - - "golang.org/x/sys/unix" -) -func init() { - umask = os.FileMode(unix.Umask(0)) - unix.Umask(int(umask)) -} +// Package query contains the API for creating Zoekt queries. Queries can be +// constructed directly through query.Q objects, or by parsing a string using +// query.Parse +package query diff --git a/query/marshal.go b/query/marshal.go index 9f099b640..188a65e67 100644 --- a/query/marshal.go +++ b/query/marshal.go @@ -8,6 +8,8 @@ import ( "io" "unsafe" + "slices" + "github.com/RoaringBitmap/roaring" ) @@ -75,7 +77,7 @@ func branchesReposDecode(b []byte) ([]BranchRepos, error) { l := r.uvarint() // Length brs := make([]BranchRepos, l) - for i := 0; i < l; i++ { + for i := range l { brs[i].Branch = r.str() brs[i].Repos = r.bitmap() } @@ -157,7 +159,7 @@ func stringSetEncode(set map[string]struct{}) ([]byte, error) { func stringSetDecode(b []byte) (map[string]struct{}, error) { // binaryReader returns strings pointing into b to avoid allocations. We // don't own b, so we create a copy of it. - r := binaryReader{b: append([]byte{}, b...)} + r := binaryReader{b: slices.Clone(b)} // Version if v := r.byt(); v != 1 { @@ -168,7 +170,7 @@ func stringSetDecode(b []byte) (map[string]struct{}, error) { l := r.uvarint() set := make(map[string]struct{}, l) - for i := 0; i < l; i++ { + for range l { set[r.str()] = struct{}{} } diff --git a/query/marshal_test.go b/query/marshal_test.go index 142cfa1b5..0c7a5a646 100644 --- a/query/marshal_test.go +++ b/query/marshal_test.go @@ -163,21 +163,21 @@ func TestFileNameSet_Marshal(t *testing.T) { func genFileNameSet(size int) *FileNameSet { set := make(map[string]struct{}, size) - for i := 0; i < size; i++ { + for i := range size { set[genName(i)] = struct{}{} } return &FileNameSet{Set: set} } // Generating 5.5M repos slows down the benchmark setup time, so we cache things. -var genCache = map[string]interface{}{} +var genCache = map[string]any{} func genRepoBranches(n int) map[string][]string { repoBranches := map[string][]string{} orgIndex := 0 repoIndex := 0 - for i := 0; i < n; i++ { + for i := range n { org := genName(orgIndex) name := "github.com/" + org + "/" + genName(orgIndex*2+repoIndex) repoBranches[name] = []string{"HEAD"} diff --git a/query/parse.go b/query/parse.go index a9ffd23e8..165528663 100644 --- a/query/parse.go +++ b/query/parse.go @@ -22,7 +22,8 @@ import ( "strings" "github.com/grafana/regexp" - "github.com/sourcegraph/zoekt/internal/languages" + + "github.com/sourcegraph/zoekt/languages" ) var _ = log.Printf @@ -68,6 +69,43 @@ func (o *orOperator) String() string { return "orOp" } +// caseScopeQ is a parse-time wrapper used to prevent case directives from an +// outer expression list from overriding an explicitly scoped inner `case:`. +type caseScopeQ struct { + Child Q +} + +func (c *caseScopeQ) String() string { + return c.Child.String() +} + +func stripCaseScopesList(qs []Q) []Q { + stripped := make([]Q, len(qs)) + for i, q := range qs { + stripped[i] = stripCaseScopes(q) + } + return stripped +} + +func stripCaseScopes(q Q) Q { + switch s := q.(type) { + case *And: + return &And{Children: stripCaseScopesList(s.Children)} + case *Or: + return &Or{Children: stripCaseScopesList(s.Children)} + case *Not: + return &Not{Child: stripCaseScopes(s.Child)} + case *Type: + return &Type{Type: s.Type, Child: stripCaseScopes(s.Child)} + case *Boost: + return &Boost{Boost: s.Boost, Child: stripCaseScopes(s.Child)} + case *caseScopeQ: + return stripCaseScopes(s.Child) + default: + return q + } +} + func isSpace(c byte) bool { return c == ' ' || c == '\t' } @@ -76,16 +114,22 @@ func isSpace(c byte) bool { func Parse(qStr string) (Q, error) { b := []byte(qStr) - qs, _, err := parseExprList(b) + qs, n, err := parseExprList(b) if err != nil { return nil, err } + if n != len(b) { + return nil, fmt.Errorf("query: extra tokens found at end input: %q", b[n:]) + } + q, err := parseOperators(qs) if err != nil { return nil, err } + q = stripCaseScopes(q) + return Simplify(q), nil } @@ -173,7 +217,7 @@ func parseExpr(in []byte) (Q, int, error) { } expr = q case tokLang: - canonical, ok := languages.GetLanguageByAlias(text) + canonical, ok := languages.GetLanguageByNameOrAlias(text) if !ok { expr = &Const{false} } else { @@ -252,6 +296,22 @@ func parseExpr(in []byte) (Q, int, error) { } expr = &RepoSet{Set: set} + case tokMeta: + // Split on ':' to separate field and value + parts := bytes.SplitN([]byte(text), []byte(":"), 2) + if len(parts) != 2 { + return nil, 0, fmt.Errorf("query: invalid meta field syntax %q", text) + } + field := string(parts[0]) + valuePattern := string(parts[1]) + re, err := regexp.Compile(valuePattern) + if err != nil { + return nil, 0, fmt.Errorf("query: invalid regexp in meta value: %v", err) + } + expr = &Meta{ + Field: field, + Value: re, + } } return expr, len(in) - len(b), nil @@ -346,12 +406,14 @@ func parseExprList(in []byte) ([]Q, int, error) { } setCase := "auto" + hasCaseScope := false newQS := qs[:0] typeT := uint8(100) for _, q := range qs { switch s := q.(type) { case *caseQ: setCase = s.Flavor + hasCaseScope = true case *Type: if s.Type < typeT { typeT = s.Type @@ -367,8 +429,25 @@ func parseExprList(in []byte) ([]Q, int, error) { return q }) if typeT != 100 { - qs = []Q{&Type{Type: typeT, Child: NewAnd(qs...)}} + typedQ, err := parseOperators(qs) + if err != nil { + return nil, 0, err + } + qs = []Q{&Type{Type: typeT, Child: typedQ}} + } + + if hasCaseScope { + scoped := make([]Q, 0, len(qs)) + for _, q := range qs { + if _, isOrOperator := q.(*orOperator); isOrOperator { + scoped = append(scoped, q) + continue + } + scoped = append(scoped, &caseScopeQ{Child: q}) + } + qs = scoped } + return qs, len(in) - len(b), nil } @@ -406,6 +485,7 @@ const ( tokPublic = 16 tokFork = 17 tokRepoSet = 18 + tokMeta = 19 ) var tokNames = map[int]string{ @@ -427,6 +507,7 @@ var tokNames = map[int]string{ tokSym: "Symbol", tokType: "Type", tokRepoSet: "RepoSet", + tokMeta: "Meta", } var prefixes = map[string]int{ @@ -448,6 +529,7 @@ var prefixes = map[string]int{ "t:": tokType, "type:": tokType, "reposet:": tokRepoSet, + "meta.": tokMeta, } var reservedWords = map[string]int{ diff --git a/query/parse_test.go b/query/parse_test.go index 233a1545d..2b71ccca3 100644 --- a/query/parse_test.go +++ b/query/parse_test.go @@ -108,10 +108,31 @@ func TestParseQuery(t *testing.T) { &Substring{Pattern: "abc", CaseSensitive: true}, &Not{Child: &Substring{Pattern: "def", FileName: true, CaseSensitive: true}}, )}, + {"(foo case:yes) bar", NewAnd( + &Substring{Pattern: "foo", CaseSensitive: true}, + &Substring{Pattern: "bar"}, + )}, + {"(case:yes foo) bar", NewAnd( + &Substring{Pattern: "foo", CaseSensitive: true}, + &Substring{Pattern: "bar"}, + )}, + {"(case:yes foo (bar))", NewAnd( + &Substring{Pattern: "foo", CaseSensitive: true}, + &Substring{Pattern: "bar", CaseSensitive: true}, + )}, + {"case:auto (foo case:yes) bar", NewAnd( + &Substring{Pattern: "foo", CaseSensitive: true}, + &Substring{Pattern: "bar"}, + )}, + {"case:yes (foo case:no) bar", NewAnd( + &Substring{Pattern: "foo"}, + &Substring{Pattern: "bar", CaseSensitive: true}, + )}, // type {"type:repo abc", &Type{Type: TypeRepo, Child: &Substring{Pattern: "abc"}}}, {"type:file abc def", &Type{Type: TypeFileName, Child: NewAnd(&Substring{Pattern: "abc"}, &Substring{Pattern: "def"})}}, + {"type:repo foo or bar", &Type{Type: TypeRepo, Child: NewOr(&Substring{Pattern: "foo"}, &Substring{Pattern: "bar"})}}, {"(type:repo abc) def", NewAnd(&Type{Type: TypeRepo, Child: &Substring{Pattern: "abc"}}, &Substring{Pattern: "def"})}, // errors. @@ -124,8 +145,30 @@ func TestParseQuery(t *testing.T) { {"abc or", nil}, {"or abc", nil}, {"def or or abc", nil}, + {"type:repo or", nil}, + {"or type:repo", nil}, + + // unbalanced parentheses + {"(", nil}, + {"((", nil}, + {"(((", nil}, + {")", nil}, + {"))", nil}, + {")))", nil}, + {"foo)", nil}, + {"foo))", nil}, + {"foo)))", nil}, + {"(foo", nil}, + {"((foo", nil}, + {"(((foo", nil}, + {"(foo))", nil}, + {"(((foo))", nil}, {"", &Const{Value: true}}, + + // whitespace + {" ( ) ", &Const{Value: true}}, + {" ( foo ) ", &Substring{Pattern: "foo"}}, } { got, err := Parse(c.in) if (c.want == nil) != (err != nil) { @@ -179,3 +222,72 @@ func TestTokenize(t *testing.T) { } } } + +func TestMetaQueryParsing(t *testing.T) { + cases := []struct { + input string + field string + pattern string + err bool + }{ + { + input: "meta.visibility_level:20", + field: "visibility_level", + pattern: "20", + err: false, + }, + { + input: "meta.needle:ha.*stack", + field: "needle", + pattern: "ha.*stack", + err: false, + }, + { + input: "meta.public:true", + field: "public", + pattern: "true", + err: false, + }, + { + input: "meta.language:go", + field: "language", + pattern: "go", + err: false, + }, + { + input: "meta.invalid_field:(", + field: "invalid_field", + pattern: "(", + err: true, + }, + } + + for _, c := range cases { + t.Run(c.input, func(t *testing.T) { + q, err := Parse(c.input) + if c.err { + if err == nil { + t.Errorf("expected error, got nil") + } + return + } + + if err != nil { + t.Errorf("unexpected error: %v", err) + } + + meta, ok := q.(*Meta) + if !ok || meta == nil { + t.Errorf("expected *Meta, got %T", q) + return + } + + if meta.Field != c.field { + t.Errorf("expected field %q, got %q", c.field, meta.Field) + } + if meta.Value == nil || meta.Value.String() != c.pattern { + t.Errorf("expected pattern %q, got %v", c.pattern, meta.Value) + } + }) + } +} diff --git a/query/query.go b/query/query.go index a58f48cf4..c77499bd7 100644 --- a/query/query.go +++ b/query/query.go @@ -26,6 +26,7 @@ import ( "github.com/RoaringBitmap/roaring" "github.com/grafana/regexp" + "github.com/sourcegraph/zoekt/internal/syntaxutil" ) @@ -508,6 +509,17 @@ func (q *Branch) String() string { return fmt.Sprintf("branch:%q", q.Pattern) } +// Meta represents a query for metadata fields. +type Meta struct { + Field string // The metadata field name + Value *regexp.Regexp // The value to match +} + +// String returns a string representation of the Meta query. +func (m *Meta) String() string { + return fmt.Sprintf("meta.%s:%s", m.Field, m.Value) +} + func queryChildren(q Q) []Q { switch s := q.(type) { case *And: diff --git a/query/query_proto.go b/query/query_proto.go index 83116d812..08f2438ae 100644 --- a/query/query_proto.go +++ b/query/query_proto.go @@ -7,47 +7,47 @@ import ( "github.com/RoaringBitmap/roaring" "github.com/grafana/regexp" - proto "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" + webserverv1 "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" ) -func QToProto(q Q) *proto.Q { +func QToProto(q Q) *webserverv1.Q { switch v := q.(type) { case RawConfig: - return &proto.Q{Query: &proto.Q_RawConfig{RawConfig: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_RawConfig{RawConfig: v.ToProto()}} case *Regexp: - return &proto.Q{Query: &proto.Q_Regexp{Regexp: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_Regexp{Regexp: v.ToProto()}} case *Symbol: - return &proto.Q{Query: &proto.Q_Symbol{Symbol: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_Symbol{Symbol: v.ToProto()}} case *Language: - return &proto.Q{Query: &proto.Q_Language{Language: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_Language{Language: v.ToProto()}} case *Const: - return &proto.Q{Query: &proto.Q_Const{Const: v.Value}} + return &webserverv1.Q{Query: &webserverv1.Q_Const{Const: v.Value}} case *Repo: - return &proto.Q{Query: &proto.Q_Repo{Repo: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_Repo{Repo: v.ToProto()}} case *RepoRegexp: - return &proto.Q{Query: &proto.Q_RepoRegexp{RepoRegexp: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_RepoRegexp{RepoRegexp: v.ToProto()}} case *BranchesRepos: - return &proto.Q{Query: &proto.Q_BranchesRepos{BranchesRepos: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_BranchesRepos{BranchesRepos: v.ToProto()}} case *RepoIDs: - return &proto.Q{Query: &proto.Q_RepoIds{RepoIds: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_RepoIds{RepoIds: v.ToProto()}} case *RepoSet: - return &proto.Q{Query: &proto.Q_RepoSet{RepoSet: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_RepoSet{RepoSet: v.ToProto()}} case *FileNameSet: - return &proto.Q{Query: &proto.Q_FileNameSet{FileNameSet: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_FileNameSet{FileNameSet: v.ToProto()}} case *Type: - return &proto.Q{Query: &proto.Q_Type{Type: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_Type{Type: v.ToProto()}} case *Substring: - return &proto.Q{Query: &proto.Q_Substring{Substring: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_Substring{Substring: v.ToProto()}} case *And: - return &proto.Q{Query: &proto.Q_And{And: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_And{And: v.ToProto()}} case *Or: - return &proto.Q{Query: &proto.Q_Or{Or: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_Or{Or: v.ToProto()}} case *Not: - return &proto.Q{Query: &proto.Q_Not{Not: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_Not{Not: v.ToProto()}} case *Branch: - return &proto.Q{Query: &proto.Q_Branch{Branch: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_Branch{Branch: v.ToProto()}} case *Boost: - return &proto.Q{Query: &proto.Q_Boost{Boost: v.ToProto()}} + return &webserverv1.Q{Query: &webserverv1.Q_Boost{Boost: v.ToProto()}} default: // The following nodes do not have a proto representation: // - caseQ: only used internally, not by the RPC layer @@ -55,50 +55,52 @@ func QToProto(q Q) *proto.Q { } } -func QFromProto(p *proto.Q) (Q, error) { +func QFromProto(p *webserverv1.Q) (Q, error) { switch v := p.Query.(type) { - case *proto.Q_RawConfig: + case *webserverv1.Q_RawConfig: return RawConfigFromProto(v.RawConfig), nil - case *proto.Q_Regexp: + case *webserverv1.Q_Regexp: return RegexpFromProto(v.Regexp) - case *proto.Q_Symbol: + case *webserverv1.Q_Symbol: return SymbolFromProto(v.Symbol) - case *proto.Q_Language: + case *webserverv1.Q_Language: return LanguageFromProto(v.Language), nil - case *proto.Q_Const: + case *webserverv1.Q_Const: return &Const{Value: v.Const}, nil - case *proto.Q_Repo: + case *webserverv1.Q_Repo: return RepoFromProto(v.Repo) - case *proto.Q_RepoRegexp: + case *webserverv1.Q_RepoRegexp: return RepoRegexpFromProto(v.RepoRegexp) - case *proto.Q_BranchesRepos: + case *webserverv1.Q_BranchesRepos: return BranchesReposFromProto(v.BranchesRepos) - case *proto.Q_RepoIds: + case *webserverv1.Q_RepoIds: return RepoIDsFromProto(v.RepoIds) - case *proto.Q_RepoSet: + case *webserverv1.Q_RepoSet: return RepoSetFromProto(v.RepoSet), nil - case *proto.Q_FileNameSet: + case *webserverv1.Q_FileNameSet: return FileNameSetFromProto(v.FileNameSet), nil - case *proto.Q_Type: + case *webserverv1.Q_Type: return TypeFromProto(v.Type) - case *proto.Q_Substring: + case *webserverv1.Q_Substring: return SubstringFromProto(v.Substring), nil - case *proto.Q_And: + case *webserverv1.Q_And: return AndFromProto(v.And) - case *proto.Q_Or: + case *webserverv1.Q_Or: return OrFromProto(v.Or) - case *proto.Q_Not: + case *webserverv1.Q_Not: return NotFromProto(v.Not) - case *proto.Q_Branch: + case *webserverv1.Q_Branch: return BranchFromProto(v.Branch), nil - case *proto.Q_Boost: + case *webserverv1.Q_Boost: return BoostFromProto(v.Boost) + case *webserverv1.Q_Meta: + return MetaFromProto(v.Meta) default: panic(fmt.Sprintf("unknown query node %T", p.Query)) } } -func RegexpFromProto(p *proto.Regexp) (*Regexp, error) { +func RegexpFromProto(p *webserverv1.Regexp) (*Regexp, error) { parsed, err := syntax.Parse(p.GetRegexp(), regexpFlags) if err != nil { return nil, err @@ -111,8 +113,8 @@ func RegexpFromProto(p *proto.Regexp) (*Regexp, error) { }, nil } -func (r *Regexp) ToProto() *proto.Regexp { - return &proto.Regexp{ +func (r *Regexp) ToProto() *webserverv1.Regexp { + return &webserverv1.Regexp{ Regexp: r.Regexp.String(), FileName: r.FileName, Content: r.Content, @@ -120,7 +122,7 @@ func (r *Regexp) ToProto() *proto.Regexp { } } -func SymbolFromProto(p *proto.Symbol) (*Symbol, error) { +func SymbolFromProto(p *webserverv1.Symbol) (*Symbol, error) { expr, err := QFromProto(p.GetExpr()) if err != nil { return nil, err @@ -131,23 +133,23 @@ func SymbolFromProto(p *proto.Symbol) (*Symbol, error) { }, nil } -func (s *Symbol) ToProto() *proto.Symbol { - return &proto.Symbol{ +func (s *Symbol) ToProto() *webserverv1.Symbol { + return &webserverv1.Symbol{ Expr: QToProto(s.Expr), } } -func LanguageFromProto(p *proto.Language) *Language { +func LanguageFromProto(p *webserverv1.Language) *Language { return &Language{ Language: p.GetLanguage(), } } -func (l *Language) ToProto() *proto.Language { - return &proto.Language{Language: l.Language} +func (l *Language) ToProto() *webserverv1.Language { + return &webserverv1.Language{Language: l.Language} } -func RepoFromProto(p *proto.Repo) (*Repo, error) { +func RepoFromProto(p *webserverv1.Repo) (*Repo, error) { r, err := regexp.Compile(p.GetRegexp()) if err != nil { return nil, err @@ -157,13 +159,13 @@ func RepoFromProto(p *proto.Repo) (*Repo, error) { }, nil } -func (q *Repo) ToProto() *proto.Repo { - return &proto.Repo{ +func (q *Repo) ToProto() *webserverv1.Repo { + return &webserverv1.Repo{ Regexp: q.Regexp.String(), } } -func RepoRegexpFromProto(p *proto.RepoRegexp) (*RepoRegexp, error) { +func RepoRegexpFromProto(p *webserverv1.RepoRegexp) (*RepoRegexp, error) { r, err := regexp.Compile(p.GetRegexp()) if err != nil { return nil, err @@ -173,13 +175,13 @@ func RepoRegexpFromProto(p *proto.RepoRegexp) (*RepoRegexp, error) { }, nil } -func (q *RepoRegexp) ToProto() *proto.RepoRegexp { - return &proto.RepoRegexp{ +func (q *RepoRegexp) ToProto() *webserverv1.RepoRegexp { + return &webserverv1.RepoRegexp{ Regexp: q.Regexp.String(), } } -func BranchesReposFromProto(p *proto.BranchesRepos) (*BranchesRepos, error) { +func BranchesReposFromProto(p *webserverv1.BranchesRepos) (*BranchesRepos, error) { brs := make([]BranchRepos, len(p.GetList())) for i, br := range p.GetList() { branchRepos, err := BranchReposFromProto(br) @@ -193,18 +195,18 @@ func BranchesReposFromProto(p *proto.BranchesRepos) (*BranchesRepos, error) { }, nil } -func (br *BranchesRepos) ToProto() *proto.BranchesRepos { - list := make([]*proto.BranchRepos, len(br.List)) +func (br *BranchesRepos) ToProto() *webserverv1.BranchesRepos { + list := make([]*webserverv1.BranchRepos, len(br.List)) for i, branchRepo := range br.List { list[i] = branchRepo.ToProto() } - return &proto.BranchesRepos{ + return &webserverv1.BranchesRepos{ List: list, } } -func RepoIDsFromProto(p *proto.RepoIds) (*RepoIDs, error) { +func RepoIDsFromProto(p *webserverv1.RepoIds) (*RepoIDs, error) { bm := roaring.NewBitmap() err := bm.UnmarshalBinary(p.GetRepos()) if err != nil { @@ -216,17 +218,17 @@ func RepoIDsFromProto(p *proto.RepoIds) (*RepoIDs, error) { }, nil } -func (q *RepoIDs) ToProto() *proto.RepoIds { +func (q *RepoIDs) ToProto() *webserverv1.RepoIds { b, err := q.Repos.ToBytes() if err != nil { panic("unexpected error marshalling bitmap: " + err.Error()) } - return &proto.RepoIds{ + return &webserverv1.RepoIds{ Repos: b, } } -func BranchReposFromProto(p *proto.BranchRepos) (BranchRepos, error) { +func BranchReposFromProto(p *webserverv1.BranchRepos) (BranchRepos, error) { bm := roaring.NewBitmap() err := bm.UnmarshalBinary(p.GetRepos()) if err != nil { @@ -238,31 +240,31 @@ func BranchReposFromProto(p *proto.BranchRepos) (BranchRepos, error) { }, nil } -func (br *BranchRepos) ToProto() *proto.BranchRepos { +func (br *BranchRepos) ToProto() *webserverv1.BranchRepos { b, err := br.Repos.ToBytes() if err != nil { panic("unexpected error marshalling bitmap: " + err.Error()) } - return &proto.BranchRepos{ + return &webserverv1.BranchRepos{ Branch: br.Branch, Repos: b, } } -func RepoSetFromProto(p *proto.RepoSet) *RepoSet { +func RepoSetFromProto(p *webserverv1.RepoSet) *RepoSet { return &RepoSet{ Set: p.GetSet(), } } -func (q *RepoSet) ToProto() *proto.RepoSet { - return &proto.RepoSet{ +func (q *RepoSet) ToProto() *webserverv1.RepoSet { + return &webserverv1.RepoSet{ Set: q.Set, } } -func FileNameSetFromProto(p *proto.FileNameSet) *FileNameSet { +func FileNameSetFromProto(p *webserverv1.FileNameSet) *FileNameSet { m := make(map[string]struct{}, len(p.GetSet())) for _, name := range p.GetSet() { m[name] = struct{}{} @@ -272,17 +274,17 @@ func FileNameSetFromProto(p *proto.FileNameSet) *FileNameSet { } } -func (q *FileNameSet) ToProto() *proto.FileNameSet { +func (q *FileNameSet) ToProto() *webserverv1.FileNameSet { s := make([]string, 0, len(q.Set)) for name := range q.Set { s = append(s, name) } - return &proto.FileNameSet{ + return &webserverv1.FileNameSet{ Set: s, } } -func TypeFromProto(p *proto.Type) (*Type, error) { +func TypeFromProto(p *webserverv1.Type) (*Type, error) { child, err := QFromProto(p.GetChild()) if err != nil { return nil, err @@ -290,11 +292,11 @@ func TypeFromProto(p *proto.Type) (*Type, error) { var kind uint8 switch p.GetType() { - case proto.Type_KIND_FILE_MATCH: + case webserverv1.Type_KIND_FILE_MATCH: kind = TypeFileMatch - case proto.Type_KIND_FILE_NAME: + case webserverv1.Type_KIND_FILE_NAME: kind = TypeFileName - case proto.Type_KIND_REPO: + case webserverv1.Type_KIND_REPO: kind = TypeRepo } @@ -305,24 +307,24 @@ func TypeFromProto(p *proto.Type) (*Type, error) { }, nil } -func (q *Type) ToProto() *proto.Type { - var kind proto.Type_Kind +func (q *Type) ToProto() *webserverv1.Type { + var kind webserverv1.Type_Kind switch q.Type { case TypeFileMatch: - kind = proto.Type_KIND_FILE_MATCH + kind = webserverv1.Type_KIND_FILE_MATCH case TypeFileName: - kind = proto.Type_KIND_FILE_NAME + kind = webserverv1.Type_KIND_FILE_NAME case TypeRepo: - kind = proto.Type_KIND_REPO + kind = webserverv1.Type_KIND_REPO } - return &proto.Type{ + return &webserverv1.Type{ Child: QToProto(q.Child), Type: kind, } } -func SubstringFromProto(p *proto.Substring) *Substring { +func SubstringFromProto(p *webserverv1.Substring) *Substring { return &Substring{ Pattern: p.GetPattern(), CaseSensitive: p.GetCaseSensitive(), @@ -331,8 +333,8 @@ func SubstringFromProto(p *proto.Substring) *Substring { } } -func (q *Substring) ToProto() *proto.Substring { - return &proto.Substring{ +func (q *Substring) ToProto() *webserverv1.Substring { + return &webserverv1.Substring{ Pattern: q.Pattern, CaseSensitive: q.CaseSensitive, FileName: q.FileName, @@ -340,7 +342,7 @@ func (q *Substring) ToProto() *proto.Substring { } } -func OrFromProto(p *proto.Or) (*Or, error) { +func OrFromProto(p *webserverv1.Or) (*Or, error) { children := make([]Q, len(p.GetChildren())) for i, child := range p.GetChildren() { c, err := QFromProto(child) @@ -354,17 +356,17 @@ func OrFromProto(p *proto.Or) (*Or, error) { }, nil } -func (q *Or) ToProto() *proto.Or { - children := make([]*proto.Q, len(q.Children)) +func (q *Or) ToProto() *webserverv1.Or { + children := make([]*webserverv1.Q, len(q.Children)) for i, child := range q.Children { children[i] = QToProto(child) } - return &proto.Or{ + return &webserverv1.Or{ Children: children, } } -func BoostFromProto(p *proto.Boost) (*Boost, error) { +func BoostFromProto(p *webserverv1.Boost) (*Boost, error) { child, err := QFromProto(p.GetChild()) if err != nil { return nil, err @@ -375,14 +377,25 @@ func BoostFromProto(p *proto.Boost) (*Boost, error) { }, nil } -func (q *Boost) ToProto() *proto.Boost { - return &proto.Boost{ +func MetaFromProto(p *webserverv1.Meta) (*Meta, error) { + re, err := regexp.Compile(p.GetValue()) + if err != nil { + return nil, fmt.Errorf("invalid regexp in Meta.Value: %w", err) + } + return &Meta{ + Field: p.GetKey(), + Value: re, + }, nil +} + +func (q *Boost) ToProto() *webserverv1.Boost { + return &webserverv1.Boost{ Child: QToProto(q.Child), Boost: q.Boost, } } -func NotFromProto(p *proto.Not) (*Not, error) { +func NotFromProto(p *webserverv1.Not) (*Not, error) { child, err := QFromProto(p.GetChild()) if err != nil { return nil, err @@ -392,13 +405,13 @@ func NotFromProto(p *proto.Not) (*Not, error) { }, nil } -func (q *Not) ToProto() *proto.Not { - return &proto.Not{ +func (q *Not) ToProto() *webserverv1.Not { + return &webserverv1.Not{ Child: QToProto(q.Child), } } -func AndFromProto(p *proto.And) (*And, error) { +func AndFromProto(p *webserverv1.And) (*And, error) { children := make([]Q, len(p.GetChildren())) for i, child := range p.GetChildren() { c, err := QFromProto(child) @@ -412,69 +425,69 @@ func AndFromProto(p *proto.And) (*And, error) { }, nil } -func (q *And) ToProto() *proto.And { - children := make([]*proto.Q, len(q.Children)) +func (q *And) ToProto() *webserverv1.And { + children := make([]*webserverv1.Q, len(q.Children)) for i, child := range q.Children { children[i] = QToProto(child) } - return &proto.And{ + return &webserverv1.And{ Children: children, } } -func BranchFromProto(p *proto.Branch) *Branch { +func BranchFromProto(p *webserverv1.Branch) *Branch { return &Branch{ Pattern: p.GetPattern(), Exact: p.GetExact(), } } -func (q *Branch) ToProto() *proto.Branch { - return &proto.Branch{ +func (q *Branch) ToProto() *webserverv1.Branch { + return &webserverv1.Branch{ Pattern: q.Pattern, Exact: q.Exact, } } -func RawConfigFromProto(p *proto.RawConfig) (res RawConfig) { +func RawConfigFromProto(p *webserverv1.RawConfig) (res RawConfig) { for _, protoFlag := range p.Flags { switch protoFlag { - case proto.RawConfig_FLAG_ONLY_PUBLIC: + case webserverv1.RawConfig_FLAG_ONLY_PUBLIC: res |= RcOnlyPublic - case proto.RawConfig_FLAG_ONLY_PRIVATE: + case webserverv1.RawConfig_FLAG_ONLY_PRIVATE: res |= RcOnlyPrivate - case proto.RawConfig_FLAG_ONLY_FORKS: + case webserverv1.RawConfig_FLAG_ONLY_FORKS: res |= RcOnlyForks - case proto.RawConfig_FLAG_NO_FORKS: + case webserverv1.RawConfig_FLAG_NO_FORKS: res |= RcNoForks - case proto.RawConfig_FLAG_ONLY_ARCHIVED: + case webserverv1.RawConfig_FLAG_ONLY_ARCHIVED: res |= RcOnlyArchived - case proto.RawConfig_FLAG_NO_ARCHIVED: + case webserverv1.RawConfig_FLAG_NO_ARCHIVED: res |= RcNoArchived } } return res } -func (r RawConfig) ToProto() *proto.RawConfig { - var flags []proto.RawConfig_Flag +func (r RawConfig) ToProto() *webserverv1.RawConfig { + var flags []webserverv1.RawConfig_Flag for _, flag := range flagNames { if r&flag.Mask != 0 { switch flag.Mask { case RcOnlyPublic: - flags = append(flags, proto.RawConfig_FLAG_ONLY_PUBLIC) + flags = append(flags, webserverv1.RawConfig_FLAG_ONLY_PUBLIC) case RcOnlyPrivate: - flags = append(flags, proto.RawConfig_FLAG_ONLY_PRIVATE) + flags = append(flags, webserverv1.RawConfig_FLAG_ONLY_PRIVATE) case RcOnlyForks: - flags = append(flags, proto.RawConfig_FLAG_ONLY_FORKS) + flags = append(flags, webserverv1.RawConfig_FLAG_ONLY_FORKS) case RcNoForks: - flags = append(flags, proto.RawConfig_FLAG_NO_FORKS) + flags = append(flags, webserverv1.RawConfig_FLAG_NO_FORKS) case RcOnlyArchived: - flags = append(flags, proto.RawConfig_FLAG_ONLY_ARCHIVED) + flags = append(flags, webserverv1.RawConfig_FLAG_ONLY_ARCHIVED) case RcNoArchived: - flags = append(flags, proto.RawConfig_FLAG_NO_ARCHIVED) + flags = append(flags, webserverv1.RawConfig_FLAG_NO_ARCHIVED) } } } - return &proto.RawConfig{Flags: flags} + return &webserverv1.RawConfig{Flags: flags} } diff --git a/query/regexp.go b/query/regexp.go index 889842fdd..f2058ceb4 100644 --- a/query/regexp.go +++ b/query/regexp.go @@ -18,6 +18,8 @@ import ( "log" "regexp/syntax" + "slices" + "github.com/sourcegraph/zoekt/internal/syntaxutil" ) @@ -81,13 +83,7 @@ func hasCapture(r *syntax.Regexp) bool { return true } - for _, s := range r.Sub { - if hasCapture(s) { - return true - } - } - - return false + return slices.ContainsFunc(r.Sub, hasCapture) } func uncapture(r *syntax.Regexp) *syntax.Regexp { diff --git a/score.go b/score.go deleted file mode 100644 index 09faad837..000000000 --- a/score.go +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright 2016 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package zoekt - -import ( - "fmt" - "math" - "strconv" - "strings" -) - -const ( - maxUInt16 = 0xffff - ScoreOffset = 10_000_000 -) - -// addScore increments the score of the FileMatch by the computed score. If -// debugScore is true, it also adds a debug string to the FileMatch. If raw is -// -1, it is ignored. Otherwise, it is added to the debug string. -func (m *FileMatch) addScore(what string, computed float64, raw float64, debugScore bool) { - if computed != 0 && debugScore { - var b strings.Builder - fmt.Fprintf(&b, "%s", what) - if raw != -1 { - fmt.Fprintf(&b, "(%s)", strconv.FormatFloat(raw, 'f', -1, 64)) - } - fmt.Fprintf(&b, ":%.2f, ", computed) - m.Debug += b.String() - } - m.Score += computed -} - -// scoreFile computes a score for the file match using various scoring signals, like -// whether there's an exact match on a symbol, the number of query clauses that matched, etc. -func (d *indexData) scoreFile(fileMatch *FileMatch, doc uint32, mt matchTree, known map[matchTree]bool, opts *SearchOptions) { - atomMatchCount := 0 - visitMatchAtoms(mt, known, func(mt matchTree) { - atomMatchCount++ - }) - - addScore := func(what string, computed float64) { - fileMatch.addScore(what, computed, -1, opts.DebugScore) - } - - // atom-count boosts files with matches from more than 1 atom. The - // maximum boost is scoreFactorAtomMatch. - if atomMatchCount > 0 { - fileMatch.addScore("atom", (1.0-1.0/float64(atomMatchCount))*scoreFactorAtomMatch, float64(atomMatchCount), opts.DebugScore) - } - - maxFileScore := 0.0 - for i := range fileMatch.LineMatches { - if maxFileScore < fileMatch.LineMatches[i].Score { - maxFileScore = fileMatch.LineMatches[i].Score - } - - // Order by ordering in file. - fileMatch.LineMatches[i].Score += scoreLineOrderFactor * (1.0 - (float64(i) / float64(len(fileMatch.LineMatches)))) - } - - for i := range fileMatch.ChunkMatches { - if maxFileScore < fileMatch.ChunkMatches[i].Score { - maxFileScore = fileMatch.ChunkMatches[i].Score - } - - // Order by ordering in file. - fileMatch.ChunkMatches[i].Score += scoreLineOrderFactor * (1.0 - (float64(i) / float64(len(fileMatch.ChunkMatches)))) - } - - // Maintain ordering of input files. This - // strictly dominates the in-file ordering of - // the matches. - addScore("fragment", maxFileScore) - - // Add tiebreakers - // - // ScoreOffset shifts the score 7 digits to the left. - fileMatch.Score = math.Trunc(fileMatch.Score) * ScoreOffset - - md := d.repoMetaData[d.repos[doc]] - - // md.Rank lies in the range [0, 65535]. Hence, we have to allocate 5 digits for - // the rank. The scoreRepoRankFactor shifts the rank score 2 digits to the left, - // reserving digits 3-7 for the repo rank. - addScore("repo-rank", scoreRepoRankFactor*float64(md.Rank)) - - // digits 1-2 and the decimals are reserved for the doc order. Doc order - // (without the scaling factor) lies in the range [0, 1]. The upper bound is - // achieved for matches in the first document of a shard. - addScore("doc-order", scoreFileOrderFactor*(1.0-float64(doc)/float64(len(d.boundaries)))) - - if opts.DebugScore { - // To make the debug output easier to read, we split the score into the query - // dependent score and the tiebreaker - score := math.Trunc(fileMatch.Score / ScoreOffset) - tiebreaker := fileMatch.Score - score*ScoreOffset - fileMatch.Debug = fmt.Sprintf("score: %d (%.2f) <- %s", int(score), tiebreaker, strings.TrimSuffix(fileMatch.Debug, ", ")) - } -} - -// idf computes the inverse document frequency for a term. nq is the number of -// documents that contain the term and documentCount is the total number of -// documents in the corpus. -func idf(nq, documentCount int) float64 { - return math.Log(1.0 + ((float64(documentCount) - float64(nq) + 0.5) / (float64(nq) + 0.5))) -} - -// termDocumentFrequency is a map "term" -> "number of documents that contain the term" -type termDocumentFrequency map[string]int - -// termFrequency stores the term frequencies for doc. -type termFrequency struct { - doc uint32 - tf map[string]int -} - -// scoreFilesUsingBM25 computes the score according to BM25, the most common -// scoring algorithm for text search: https://en.wikipedia.org/wiki/Okapi_BM25. -// -// This scoring strategy ignores all other signals including document ranks. -// This keeps things simple for now, since BM25 is not normalized and can be -// tricky to combine with other scoring signals. -func (d *indexData) scoreFilesUsingBM25(fileMatches []FileMatch, tfs []termFrequency, df termDocumentFrequency, opts *SearchOptions) { - // Use standard parameter defaults (used in Lucene and academic papers) - k, b := 1.2, 0.75 - - averageFileLength := float64(d.boundaries[d.numDocs()]) / float64(d.numDocs()) - // This is very unlikely, but explicitly guard against division by zero. - if averageFileLength == 0 { - averageFileLength++ - } - - for i := range tfs { - score := 0.0 - - // Compute the file length ratio. Usually the calculation would be based on terms, but using - // bytes should work fine, as we're just computing a ratio. - doc := tfs[i].doc - fileLength := float64(d.boundaries[doc+1] - d.boundaries[doc]) - - L := fileLength / averageFileLength - - sumTF := 0 // Just for debugging - for term, f := range tfs[i].tf { - sumTF += f - tfScore := ((k + 1.0) * float64(f)) / (k*(1.0-b+b*L) + float64(f)) - score += idf(df[term], int(d.numDocs())) * tfScore - } - - fileMatches[i].Score = score - - if opts.DebugScore { - fileMatches[i].Debug = fmt.Sprintf("bm25-score: %.2f <- sum-termFrequencies: %d, length-ratio: %.2f", score, sumTF, L) - } - } -} diff --git a/shards/aggregate.go b/search/aggregate.go similarity index 93% rename from shards/aggregate.go rename to search/aggregate.go index a89d99593..7839e937e 100644 --- a/shards/aggregate.go +++ b/search/aggregate.go @@ -1,7 +1,8 @@ -package shards +package search import ( "context" + "maps" "sync" "time" @@ -9,6 +10,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" ) var metricFinalAggregateSize = promauto.NewHistogramVec(prometheus.HistogramOpts{ @@ -46,14 +48,10 @@ func (c *collectSender) Send(r *zoekt.SearchResult) { if len(r.Files) > 0 { c.aggregate.Files = append(c.aggregate.Files, r.Files...) - c.aggregate.Files = zoekt.SortAndTruncateFiles(c.aggregate.Files, c.opts) + c.aggregate.Files = index.SortAndTruncateFiles(c.aggregate.Files, c.opts) - for k, v := range r.RepoURLs { - c.aggregate.RepoURLs[k] = v - } - for k, v := range r.LineFragments { - c.aggregate.LineFragments[k] = v - } + maps.Copy(c.aggregate.RepoURLs, r.RepoURLs) + maps.Copy(c.aggregate.LineFragments, r.LineFragments) } // The priority of our aggregate is the largest priority we collect. @@ -150,7 +148,7 @@ func newFlushCollectSender(opts *zoekt.SearchOptions, sender zoekt.Sender) (zoek // limitSender wraps a sender and calls cancel once the truncator has finished // truncating. -func limitSender(cancel context.CancelFunc, sender zoekt.Sender, truncator zoekt.DisplayTruncator) zoekt.Sender { +func limitSender(cancel context.CancelFunc, sender zoekt.Sender, truncator index.DisplayTruncator) zoekt.Sender { return zoekt.SenderFunc(func(result *zoekt.SearchResult) { var hasMore bool result.Files, hasMore = truncator(result.Files) diff --git a/shards/eval.go b/search/eval.go similarity index 93% rename from shards/eval.go rename to search/eval.go index debe6d2a6..25ebe2c4c 100644 --- a/shards/eval.go +++ b/search/eval.go @@ -1,12 +1,12 @@ -package shards +package search import ( "context" "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/internal/tenant" + "github.com/sourcegraph/zoekt/internal/trace" "github.com/sourcegraph/zoekt/query" - "github.com/sourcegraph/zoekt/trace" ) // typeRepoSearcher evaluates all type:repo sub-queries before sending the query @@ -20,9 +20,7 @@ func (s *typeRepoSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.Se tr, ctx := trace.New(ctx, "typeRepoSearcher.Search", "") tr.LazyLog(q, true) tr.LazyPrintf("opts: %+v", opts) - if tenant.EnforceTenant() { - tenant.Log(ctx, tr) - } + tenant.Log(ctx, tr) defer func() { if sr != nil { tr.LazyPrintf("num files: %d", len(sr.Files)) @@ -47,9 +45,7 @@ func (s *typeRepoSearcher) StreamSearch(ctx context.Context, q query.Q, opts *zo tr, ctx := trace.New(ctx, "typeRepoSearcher.StreamSearch", "") tr.LazyLog(q, true) tr.LazyPrintf("opts: %+v", opts) - if tenant.EnforceTenant() { - tenant.Log(ctx, tr) - } + tenant.Log(ctx, tr) var stats zoekt.Stats defer func() { tr.LazyPrintf("stats: %+v", stats) @@ -75,9 +71,7 @@ func (s *typeRepoSearcher) List(ctx context.Context, q query.Q, opts *zoekt.List tr, ctx := trace.New(ctx, "typeRepoSearcher.List", "") tr.LazyLog(q, true) tr.LazyPrintf("opts: %s", opts) - if tenant.EnforceTenant() { - tenant.Log(ctx, tr) - } + tenant.Log(ctx, tr) defer func() { if rl != nil { tr.LazyPrintf("repos.size=%d reposmap.size=%d crashes=%d stats=%+v", len(rl.Repos), len(rl.ReposMap), rl.Crashes, rl.Stats) diff --git a/shards/eval_test.go b/search/eval_test.go similarity index 88% rename from shards/eval_test.go rename to search/eval_test.go index c53011294..6d312a669 100644 --- a/shards/eval_test.go +++ b/search/eval_test.go @@ -1,4 +1,4 @@ -package shards +package search import ( "context" @@ -6,23 +6,24 @@ import ( "testing" "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/query" ) func TestSearchTypeRepo(t *testing.T) { ss := newShardedSearcher(2) nextShardNum := 1 - addShard := func(docs ...zoekt.Document) { - b := testIndexBuilder(t, &zoekt.Repository{ID: 1, Name: "reponame"}, docs...) + addShard := func(docs ...index.Document) { + b := testShardBuilder(t, &zoekt.Repository{ID: 1, Name: "reponame"}, docs...) shard := searcherForTest(t, b) ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard}) nextShardNum++ } addShard( - zoekt.Document{Name: "f1", Content: []byte("bla the needle")}, - zoekt.Document{Name: "f2", Content: []byte("another file another needle")}) + index.Document{Name: "f1", Content: []byte("bla the needle")}, + index.Document{Name: "f2", Content: []byte("another file another needle")}) addShard( - zoekt.Document{Name: "f3", Content: []byte("another shard")}) + index.Document{Name: "f3", Content: []byte("another shard")}) searcher := &typeRepoSearcher{ss} search := func(q query.Q, o ...zoekt.SearchOptions) *zoekt.SearchResult { @@ -96,7 +97,7 @@ func TestSearchTypeRepo(t *testing.T) { t.Fatalf("got %v, want 0 matches", len(res.Files)) } - // no match by path + // no index by path res = search(query.NewAnd( &query.Type{ Type: query.TypeRepo, diff --git a/shards/sched.go b/search/sched.go similarity index 99% rename from shards/sched.go rename to search/sched.go index c9fcd8c99..d11110268 100644 --- a/shards/sched.go +++ b/search/sched.go @@ -1,4 +1,4 @@ -package shards +package search import ( "context" @@ -340,7 +340,7 @@ func parseTuneables(v string) map[string]int { // - batch timedout // - released // -// We have separate gauges and counters for exclusive processes which match +// We have separate gauges and counters for exclusive processes which index // what we track for normal processes: // // - exclusive queued diff --git a/shards/sched_test.go b/search/sched_test.go similarity index 98% rename from shards/sched_test.go rename to search/sched_test.go index 3b5ac7bf8..0a24abca5 100644 --- a/shards/sched_test.go +++ b/search/sched_test.go @@ -1,4 +1,4 @@ -package shards +package search import ( "context" @@ -138,7 +138,7 @@ func TestMultiScheduler(t *testing.T) { }() // Fill up interactive queue - for i := 0; i < capacity; i++ { + for range capacity { addProc() } diff --git a/shards/shards.go b/search/shards.go similarity index 93% rename from shards/shards.go rename to search/shards.go index 4552096b2..751390cc5 100644 --- a/shards/shards.go +++ b/search/shards.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package shards +package search import ( "context" @@ -28,19 +28,24 @@ import ( "sync" "time" - "golang.org/x/sync/semaphore" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + sglog "github.com/sourcegraph/log" "go.uber.org/atomic" + "golang.org/x/sync/semaphore" "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/internal/tenant/systemtenant" + "github.com/sourcegraph/zoekt/internal/trace" "github.com/sourcegraph/zoekt/query" - "github.com/sourcegraph/zoekt/trace" ) var ( + shardRecoveryLogger = sync.OnceValue(func() sglog.Logger { + return sglog.Scoped("searchShards") + }) + metricShardsLoaded = promauto.NewGauge(prometheus.GaugeOpts{ Name: "zoekt_shards_loaded", Help: "The number of shards currently loaded", @@ -436,6 +441,22 @@ func doSelectRepoSet(shards []*rankedShard, and *query.And) ([]*rankedShard, que } return false }) + case *query.Meta: + // Meta queries filter repositories based on metadata fields. + // By checking this at the shard level, we can skip entire shards + // that don't contain any matching repositories, avoiding expensive + // I/O operations. + setSize = 0 // Unknown size, we'll filter based on metadata + hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool { + if repo.Metadata == nil { + return false + } + v, ok := repo.Metadata[setQuery.Field] + if !ok { + return false + } + return setQuery.Value.MatchString(v) + }) default: continue } @@ -486,7 +507,7 @@ func doSelectRepoSet(shards []*rankedShard, and *query.And) ([]*rankedShard, que // shard indexData.simplify will simplify to (and true (content baz)) -> // (content baz). This work can be done now once, rather than per shard. switch c := c.(type) { - case *query.RepoSet, *query.RepoIDs, *query.Repo: + case *query.RepoSet, *query.RepoIDs, *query.Repo, *query.Meta: and.Children[i] = &query.Const{Value: true} return filtered, query.Simplify(and) @@ -623,7 +644,7 @@ func (ss *shardedSearcher) StreamSearch(ctx context.Context, q query.Q, opts *zo // For streaming, the wrapping has to happen in the inverted order. sender = copyFileSender(sender) - if truncator, hasLimits := zoekt.NewDisplayTruncator(opts); hasLimits { + if truncator, hasLimits := index.NewDisplayTruncator(opts); hasLimits { var cancel context.CancelFunc ctx, cancel = context.WithCancel(ctx) defer cancel() @@ -689,10 +710,7 @@ func streamSearch(ctx context.Context, proc *process, q query.Q, opts *zoekt.Sea // We set the number of workers to GOMAXPROCS, or the number of shards, // whichever is smaller. - workers := runtime.GOMAXPROCS(0) - if workers > len(shards) { - workers = len(shards) - } + workers := min(runtime.GOMAXPROCS(0), len(shards)) type result struct { priority float64 @@ -715,7 +733,7 @@ func streamSearch(ctx context.Context, proc *process, q query.Q, opts *zoekt.Sea // Note: Making "search" a buffered channel has the effect of limiting the number of parallel shard searches. // Since searching is mostly CPU bound, limiting parallel shard searches also reduces the peak working set. wg.Add(workers) - for i := 0; i < workers; i++ { + for range workers { go func() { defer wg.Done() for s := range search { @@ -806,19 +824,42 @@ search: // sendByRepository splits a zoekt.SearchResult by repository and calls // sender.Send for each batch. Ranking in Sourcegraph expects zoekt.SearchResult -// to contain results with the same zoekt.SearchResult.Priority only. +// to contain results with the same zoekt.SearchResult.priority only. // // We split by repository instead of by priority because it is easier to set // RepoURLs and LineFragments in zoekt.SearchResult. func sendByRepository(result *zoekt.SearchResult, opts *zoekt.SearchOptions, sender zoekt.Sender) { if len(result.RepoURLs) <= 1 || len(result.Files) == 0 { - zoekt.SortFiles(result.Files) + index.SortFiles(result.Files) sender.Send(result) return } send := func(repoName string, a, b int, stats zoekt.Stats) { - zoekt.SortFiles(result.Files[a:b]) + index.SortFiles(result.Files[a:b]) + + filteredRepoURLs := map[string]string{repoName: result.RepoURLs[repoName]} + filteredLineFragments := map[string]string{repoName: result.LineFragments[repoName]} + + // Filter RepoURLs and LineFragments to only those of repoName and its + // subRepositories if there are subRepositories + for _, file := range result.Files[a:b] { + name := file.SubRepositoryName + if name == "" { + continue + } + _, repoSet := filteredRepoURLs[name] + url, ok := result.RepoURLs[name] + if !repoSet && ok { + filteredRepoURLs[name] = url + } + _, fragSet := filteredLineFragments[name] + frag, ok := result.LineFragments[name] + if !fragSet && ok { + filteredLineFragments[name] = frag + } + } + sender.Send(&zoekt.SearchResult{ Stats: stats, Progress: zoekt.Progress{ @@ -826,8 +867,8 @@ func sendByRepository(result *zoekt.SearchResult, opts *zoekt.SearchOptions, sen MaxPendingPriority: result.MaxPendingPriority, }, Files: result.Files[a:b], - RepoURLs: map[string]string{repoName: result.RepoURLs[repoName]}, - LineFragments: map[string]string{repoName: result.LineFragments[repoName]}, + RepoURLs: filteredRepoURLs, + LineFragments: filteredLineFragments, }) } @@ -891,12 +932,29 @@ func copyFiles(sr *zoekt.SearchResult) { } } +func logShardCrash(operation string, s zoekt.Searcher, q query.Q, recovered any, stack []byte) { + fields := []sglog.Field{ + sglog.String("operation", operation), + sglog.String("shard", s.String()), + sglog.String("query", q.String()), + sglog.String("stacktrace", string(stack)), + } + + if err, ok := recovered.(error); ok { + fields = append(fields, sglog.Error(err)) + } else { + fields = append(fields, sglog.String("panic", fmt.Sprint(recovered))) + } + + shardRecoveryLogger().Error("crashed shard", fields...) +} + func searchOneShard(ctx context.Context, s zoekt.Searcher, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) { metricSearchShardRunning.Inc() defer func() { metricSearchShardRunning.Dec() if e := recover(); e != nil { - log.Printf("[ERROR] crashed shard: %s: %#v, %s", s, e, debug.Stack()) + logShardCrash("search", s, q, e, debug.Stack()) if sr == nil { sr = &zoekt.SearchResult{} @@ -918,7 +976,7 @@ func listOneShard(ctx context.Context, s zoekt.Searcher, q query.Q, opts *zoekt. defer func() { metricListShardRunning.Dec() if r := recover(); r != nil { - log.Printf("[ERROR] crashed shard: %s: %s, %s", s.String(), r, debug.Stack()) + logShardCrash("list", s, q, r, debug.Stack()) sink <- shardListResult{ &zoekt.RepoList{Crashes: 1}, nil, } @@ -995,7 +1053,7 @@ func (ss *shardedSearcher) List(ctx context.Context, q query.Q, opts *zoekt.List } close(feeder) - for i := 0; i < runtime.GOMAXPROCS(0); i++ { + for range runtime.GOMAXPROCS(0) { go func() { for s := range feeder { listOneShard(ctx, s, q, opts, all) @@ -1203,11 +1261,11 @@ func loadShard(fn string) (zoekt.Searcher, error) { return nil, err } - iFile, err := zoekt.NewIndexFile(f) + iFile, err := index.NewIndexFile(f) if err != nil { return nil, err } - s, err := zoekt.NewSearcher(iFile) + s, err := index.NewSearcher(iFile) if err != nil { iFile.Close() return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err) diff --git a/shards/shards_test.go b/search/shards_test.go similarity index 75% rename from shards/shards_test.go rename to search/shards_test.go index c6bfbb601..ed1b7a733 100644 --- a/shards/shards_test.go +++ b/search/shards_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package shards +package search import ( "bytes" @@ -36,6 +36,9 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/grafana/regexp" + sglog "github.com/sourcegraph/log" + + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/query" @@ -73,6 +76,10 @@ func TestCrashResilience(t *testing.T) { log.SetOutput(out) defer log.SetOutput(oldOut) + oldShardRecoveryLogger := shardRecoveryLogger + shardRecoveryLogger = sglog.NoOp + defer func() { shardRecoveryLogger = oldShardRecoveryLogger }() + ss := newShardedSearcher(2) ss.ranked.Store([]*rankedShard{{Searcher: &crashSearcher{}}}) @@ -158,7 +165,7 @@ func TestOrderByShard(t *testing.T) { ss := newShardedSearcher(1) n := 10 * runtime.GOMAXPROCS(0) - for i := 0; i < n; i++ { + for i := range n { ss.replace(map[string]zoekt.Searcher{ fmt.Sprintf("shard%d", i): &rankSearcher{rank: uint16(i)}, }) @@ -199,13 +206,13 @@ func TestShardedSearcher_Ranking(t *testing.T) { ss := newShardedSearcher(1) var nextShardNum int - addShard := func(repo string, priority float64, docs ...zoekt.Document) { + addShard := func(repo string, priority float64, docs ...index.Document) { r := &zoekt.Repository{ID: hash(repo), Name: repo} r.RawConfig = map[string]string{ "public": "1", "priority": strconv.FormatFloat(priority, 'f', 2, 64), } - b := testIndexBuilder(t, r, docs...) + b := testShardBuilder(t, r, docs...) shard := searcherForTest(t, b) ss.replace(map[string]zoekt.Searcher{ fmt.Sprintf("key-%d", nextShardNum): shard, @@ -213,10 +220,10 @@ func TestShardedSearcher_Ranking(t *testing.T) { nextShardNum++ } - addShard("weekend-project", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) - addShard("moderately-popular", 500, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) - addShard("weekend-project-2", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) - addShard("super-star", 5000, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) + addShard("weekend-project", 20, index.Document{Name: "f2", Content: []byte("foo bas")}) + addShard("moderately-popular", 500, index.Document{Name: "f3", Content: []byte("foo bar")}) + addShard("weekend-project-2", 20, index.Document{Name: "f2", Content: []byte("foo bas")}) + addShard("super-star", 5000, index.Document{Name: "f1", Content: []byte("foo bar bas")}) want := []string{ "super-star", @@ -241,13 +248,13 @@ func TestShardedSearcher_DocumentRanking(t *testing.T) { ss := newShardedSearcher(1) var nextShardNum int - addShard := func(repo string, rank uint16, docs ...zoekt.Document) { + addShard := func(repo string, rank uint16, docs ...index.Document) { r := &zoekt.Repository{ID: hash(repo), Name: repo} r.RawConfig = map[string]string{ "public": "1", } r.Rank = rank - b := testIndexBuilder(t, r, docs...) + b := testShardBuilder(t, r, docs...) shard := searcherForTest(t, b) ss.replace(map[string]zoekt.Searcher{ fmt.Sprintf("key-%d", nextShardNum): shard, @@ -255,11 +262,11 @@ func TestShardedSearcher_DocumentRanking(t *testing.T) { nextShardNum++ } - addShard("old-project", 1, zoekt.Document{Name: "f1", Content: []byte("foobar")}) - addShard("recent", 2, zoekt.Document{Name: "f2", Content: []byte("foobaz")}) - addShard("old-project-2", 1, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) - addShard("new", 3, zoekt.Document{Name: "f4", Content: []byte("foo baz")}, - zoekt.Document{Name: "f5", Content: []byte("fooooo")}) + addShard("old-project", 1, index.Document{Name: "f1", Content: []byte("foobar")}) + addShard("recent", 2, index.Document{Name: "f2", Content: []byte("foobaz")}) + addShard("old-project-2", 1, index.Document{Name: "f3", Content: []byte("foo bar")}) + addShard("new", 3, index.Document{Name: "f4", Content: []byte("foo baz")}, + index.Document{Name: "f5", Content: []byte("fooooo")}) // Run a stream search and gather the results var results []*zoekt.SearchResult @@ -286,7 +293,7 @@ func TestShardedSearcher_DocumentRanking(t *testing.T) { files := results[1].Files got := make([]string, len(files)) - for i := 0; i < len(files); i++ { + for i := range files { got[i] = files[i].FileName } @@ -305,7 +312,7 @@ func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) { repoSetNames := []string{} repoIDs := []uint32{} n := 10 * runtime.GOMAXPROCS(0) - for i := 0; i < n; i++ { + for i := range n { shardName := fmt.Sprintf("shard%d", i) repoName := fmt.Sprintf("%s-repository%.3d", namePrefix[i%3], i) repoID := hash(repoName) @@ -385,6 +392,182 @@ func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) { } } +func TestFilteringShardsByMeta(t *testing.T) { + ss := newShardedSearcher(1) + + // Create repos with different metadata values + // We'll create 30 repos total: + // - 10 with nickname="project-A" + // - 10 with nickname="project-B" + // - 10 with no metadata + n := 30 + projectARepos := []string{} + projectBRepos := []string{} + + // Common document that will be in all repos + doc := index.Document{ + Name: "common.go", + Content: []byte("needle haystack"), + } + + for i := range n { + shardName := fmt.Sprintf("shard%d", i) + repoName := fmt.Sprintf("repository%.3d", i) + + var metadata map[string]string + if i < 10 { + // First 10 repos have project-A + metadata = map[string]string{"nickname": "project-A", "visibility": "public"} + projectARepos = append(projectARepos, repoName) + } else if i < 20 { + // Next 10 repos have project-B + metadata = map[string]string{"nickname": "project-B", "visibility": "private"} + projectBRepos = append(projectBRepos, repoName) + } + // Last 10 repos have no metadata + + repo := &zoekt.Repository{ + ID: uint32(i + 1), + Name: repoName, + Metadata: metadata, + } + + ss.replace(map[string]zoekt.Searcher{ + shardName: searcherForTest(t, testShardBuilder(t, repo, doc)), + }) + } + + // Test 1: Search without Meta filter - should search all shards + res, err := ss.Search(context.Background(), &query.Substring{Pattern: "needle"}, &zoekt.SearchOptions{}) + if err != nil { + t.Fatalf("Search without filter: %v", err) + } + if len(res.Files) != n { + t.Fatalf("no meta filter: got %d results, want %d", len(res.Files), n) + } + + sub := &query.Substring{Pattern: "needle"} + + // Helper function to extract unique repo names from search results + getRepoNames := func(files []zoekt.FileMatch) []string { + repoSet := make(map[string]struct{}) + for _, f := range files { + repoSet[f.Repository] = struct{}{} + } + repos := make([]string, 0, len(repoSet)) + for repo := range repoSet { + repos = append(repos, repo) + } + sort.Strings(repos) + return repos + } + + // Test 2: Filter by nickname="project-A" - should only search 10 shards + metaQueryA := &query.Meta{ + Field: "nickname", + Value: regexp.MustCompile("^project-A$"), + } + res, err = ss.Search(context.Background(), query.NewAnd(metaQueryA, sub), &zoekt.SearchOptions{}) + if err != nil { + t.Fatalf("Search with Meta filter A: %v", err) + } + gotRepos := getRepoNames(res.Files) + wantRepos := append([]string{}, projectARepos...) + sort.Strings(wantRepos) + if !reflect.DeepEqual(gotRepos, wantRepos) { + t.Fatalf("Meta(nickname=project-A):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos) + } + + // Test 3: Filter by nickname="project-B" - should only search 10 shards + metaQueryB := &query.Meta{ + Field: "nickname", + Value: regexp.MustCompile("^project-B$"), + } + res, err = ss.Search(context.Background(), query.NewAnd(metaQueryB, sub), &zoekt.SearchOptions{}) + if err != nil { + t.Fatalf("Search with Meta filter B: %v", err) + } + gotRepos = getRepoNames(res.Files) + wantRepos = append([]string{}, projectBRepos...) + sort.Strings(wantRepos) + if !reflect.DeepEqual(gotRepos, wantRepos) { + t.Fatalf("Meta(nickname=project-B):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos) + } + + // Test 4: Filter by visibility="public" - should only search 10 shards (project-A repos) + metaQueryPublic := &query.Meta{ + Field: "visibility", + Value: regexp.MustCompile("^public$"), + } + res, err = ss.Search(context.Background(), query.NewAnd(metaQueryPublic, sub), &zoekt.SearchOptions{}) + if err != nil { + t.Fatalf("Search with Meta filter public: %v", err) + } + gotRepos = getRepoNames(res.Files) + wantRepos = append([]string{}, projectARepos...) + sort.Strings(wantRepos) + if !reflect.DeepEqual(gotRepos, wantRepos) { + t.Fatalf("Meta(visibility=public):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos) + } + + // Test 5: Filter by non-existent field - should return 0 results + metaQueryNonExistent := &query.Meta{ + Field: "nonexistent_field", + Value: regexp.MustCompile(".*"), + } + res, err = ss.Search(context.Background(), query.NewAnd(metaQueryNonExistent, sub), &zoekt.SearchOptions{}) + if err != nil { + t.Fatalf("Search with Meta filter non-existent: %v", err) + } + if len(res.Files) != 0 { + t.Fatalf("Meta(nonexistent_field): got %d results, want 0", len(res.Files)) + } + + // Test 6: Filter by regex pattern matching multiple values + metaQueryRegex := &query.Meta{ + Field: "nickname", + Value: regexp.MustCompile("project-.*"), // Matches both project-A and project-B + } + res, err = ss.Search(context.Background(), query.NewAnd(metaQueryRegex, sub), &zoekt.SearchOptions{}) + if err != nil { + t.Fatalf("Search with Meta regex filter: %v", err) + } + gotRepos = getRepoNames(res.Files) + wantRepos = append(append([]string{}, projectARepos...), projectBRepos...) + sort.Strings(wantRepos) + if !reflect.DeepEqual(gotRepos, wantRepos) { + t.Fatalf("Meta(nickname=project-.*):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos) + } + + // Test 7: Test that Meta query alone (without content search) works + res, err = ss.Search(context.Background(), metaQueryA, &zoekt.SearchOptions{}) + if err != nil { + t.Fatalf("Search with Meta query alone: %v", err) + } + gotRepos = getRepoNames(res.Files) + wantRepos = append([]string{}, projectARepos...) + sort.Strings(wantRepos) + if !reflect.DeepEqual(gotRepos, wantRepos) { + t.Fatalf("Meta query alone:\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos) + } + + // Test 8: Test with List operation (not just Search) + listRes, err := ss.List(context.Background(), metaQueryA, nil) + if err != nil { + t.Fatalf("List with Meta filter: %v", err) + } + gotListRepos := make([]string, len(listRes.Repos)) + for i, r := range listRes.Repos { + gotListRepos[i] = r.Repository.Name + } + sort.Strings(gotListRepos) + wantRepos = append([]string{}, projectARepos...) + sort.Strings(wantRepos) + if !reflect.DeepEqual(gotListRepos, wantRepos) { + t.Fatalf("List with Meta(nickname=project-A):\ngot repos: %v\nwant repos: %v", gotListRepos, wantRepos) + } +} + func hash(name string) uint32 { h := fnv.New32() h.Write([]byte(name)) @@ -409,7 +592,7 @@ func (s *memSeeker) Size() (uint32, error) { } func TestUnloadIndex(t *testing.T) { - b := testIndexBuilder(t, nil, zoekt.Document{ + b := testShardBuilder(t, nil, index.Document{ Name: "filename", Content: []byte("needle needle needle"), }) @@ -420,7 +603,7 @@ func TestUnloadIndex(t *testing.T) { } indexBytes := buf.Bytes() indexFile := &memSeeker{indexBytes} - searcher, err := zoekt.NewSearcher(indexFile) + searcher, err := index.NewSearcher(indexFile) if err != nil { t.Fatalf("NewSearcher: %v", err) } @@ -471,7 +654,7 @@ func TestShardedSearcher_List(t *testing.T) { }, } - doc := zoekt.Document{ + doc := index.Document{ Name: "foo.go", Content: []byte("bar\nbaz"), Branches: []string{"main", "dev"}, @@ -480,10 +663,10 @@ func TestShardedSearcher_List(t *testing.T) { // Test duplicate removal when ListOptions.Minimal is true and false ss := newShardedSearcher(4) ss.replace(map[string]zoekt.Searcher{ - "1": searcherForTest(t, testIndexBuilder(t, repos[0], doc)), - "2": searcherForTest(t, testIndexBuilder(t, repos[0])), - "3": searcherForTest(t, testIndexBuilder(t, repos[1], doc)), - "4": searcherForTest(t, testIndexBuilder(t, repos[1])), + "1": searcherForTest(t, testShardBuilder(t, repos[0], doc)), + "2": searcherForTest(t, testShardBuilder(t, repos[0])), + "3": searcherForTest(t, testShardBuilder(t, repos[1], doc)), + "4": searcherForTest(t, testShardBuilder(t, repos[1])), }) ss.markReady() @@ -608,10 +791,10 @@ func TestShardedSearcher_List(t *testing.T) { } } -func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...zoekt.Document) *zoekt.IndexBuilder { - b, err := zoekt.NewIndexBuilder(repo) +func testShardBuilder(t testing.TB, repo *zoekt.Repository, docs ...index.Document) *index.ShardBuilder { + b, err := index.NewShardBuilder(repo) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } for i, d := range docs { @@ -622,14 +805,14 @@ func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...zoekt.Docume return b } -func searcherForTest(t testing.TB, b *zoekt.IndexBuilder) zoekt.Searcher { +func searcherForTest(t testing.TB, b *index.ShardBuilder) zoekt.Searcher { var buf bytes.Buffer if err := b.Write(&buf); err != nil { t.Fatal(err) } f := &memSeeker{buf.Bytes()} - searcher, err := zoekt.NewSearcher(f) + searcher, err := index.NewSearcher(f) if err != nil { t.Fatalf("NewSearcher: %v", err) } @@ -638,7 +821,7 @@ func searcherForTest(t testing.TB, b *zoekt.IndexBuilder) zoekt.Searcher { } func reposForTest(n int) (result []*zoekt.Repository) { - for i := 0; i < n; i++ { + for i := range n { result = append(result, &zoekt.Repository{ ID: uint32(i + 1), Name: fmt.Sprintf("test-repository-%d", i), @@ -648,9 +831,9 @@ func reposForTest(n int) (result []*zoekt.Repository) { } func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt.Searcher { - builder := testIndexBuilder(b, r) + builder := testShardBuilder(b, r) - if err := builder.Add(zoekt.Document{ + if err := builder.Add(index.Document{ Name: fmt.Sprintf("%s/filename-%d.go", r.Name, 0), Content: []byte("needle needle needle haystack"), }); err != nil { @@ -658,7 +841,7 @@ func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt. } for i := 1; i < numFiles; i++ { - if err := builder.Add(zoekt.Document{ + if err := builder.Add(index.Document{ Name: fmt.Sprintf("%s/filename-%d.go", r.Name, i), Content: []byte("haystack haystack haystack"), }); err != nil { @@ -754,17 +937,17 @@ func TestRawQuerySearch(t *testing.T) { ss := newShardedSearcher(1) var nextShardNum int - addShard := func(repo string, rawConfig map[string]string, docs ...zoekt.Document) { + addShard := func(repo string, rawConfig map[string]string, docs ...index.Document) { r := &zoekt.Repository{Name: repo} r.RawConfig = rawConfig - b := testIndexBuilder(t, r, docs...) + b := testShardBuilder(t, r, docs...) shard := searcherForTest(t, b) ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard}) nextShardNum++ } - addShard("public", map[string]string{"public": "1"}, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) - addShard("private_archived", map[string]string{"archived": "1"}, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) - addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) + addShard("public", map[string]string{"public": "1"}, index.Document{Name: "f1", Content: []byte("foo bar bas")}) + addShard("private_archived", map[string]string{"archived": "1"}, index.Document{Name: "f2", Content: []byte("foo bas")}) + addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, index.Document{Name: "f3", Content: []byte("foo bar")}) cases := []struct { pattern string @@ -969,7 +1152,7 @@ func mkSearchResult(n int, repoID uint32) *zoekt.SearchResult { return &zoekt.SearchResult{} } fm := make([]zoekt.FileMatch, 0, n) - for i := 0; i < n; i++ { + for range n { fm = append(fm, zoekt.FileMatch{Repository: fmt.Sprintf("repo%d", repoID), RepositoryID: repoID}) } @@ -979,7 +1162,7 @@ func mkSearchResult(n int, repoID uint32) *zoekt.SearchResult { func TestFileBasedSearch(t *testing.T) { cases := []struct { name string - testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch + testShardedSearch func(t *testing.T, q query.Q, ib *index.ShardBuilder, useDocumentRanks bool) []zoekt.FileMatch }{ {"Search", testShardedSearch}, {"StreamSearch", testShardedStreamSearch}, @@ -989,9 +1172,9 @@ func TestFileBasedSearch(t *testing.T) { // -----------0123456789012345678901234567890123456789 c2 := []byte("In Dutch, ananas means pineapple") // -----------0123456789012345678901234567890123456789 - b := testIndexBuilder(t, nil, - zoekt.Document{Name: "f1", Content: c1}, - zoekt.Document{Name: "f2", Content: c2}, + b := testShardBuilder(t, nil, + index.Document{Name: "f1", Content: c1}, + index.Document{Name: "f2", Content: c2}, ) for _, tt := range cases { @@ -1019,17 +1202,17 @@ func TestFileBasedSearch(t *testing.T) { func TestWordBoundaryRanking(t *testing.T) { cases := []struct { name string - testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch + testShardedSearch func(t *testing.T, q query.Q, ib *index.ShardBuilder, useDocumentRanks bool) []zoekt.FileMatch }{ {"Search", testShardedSearch}, {"StreamSearch", testShardedStreamSearch}, } - b := testIndexBuilder(t, nil, - zoekt.Document{Name: "f1", Content: []byte("xbytex xbytex")}, - zoekt.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")}, + b := testShardBuilder(t, nil, + index.Document{Name: "f1", Content: []byte("xbytex xbytex")}, + index.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")}, // -----------------------------------------0123456 789012 34567890 - zoekt.Document{Name: "f3", Content: []byte("xbytex ybytex")}) + index.Document{Name: "f3", Content: []byte("xbytex ybytex")}) for _, tt := range cases { for _, useDocumentRanks := range []bool{false, true} { @@ -1059,22 +1242,22 @@ func TestWordBoundaryRanking(t *testing.T) { func TestAtomCountScore(t *testing.T) { cases := []struct { name string - testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch + testShardedSearch func(t *testing.T, q query.Q, ib *index.ShardBuilder, useDocumentRanks bool) []zoekt.FileMatch }{ {"Search", testShardedSearch}, {"StreamSearch", testShardedStreamSearch}, } - b := testIndexBuilder(t, + b := testShardBuilder(t, &zoekt.Repository{ Branches: []zoekt.RepositoryBranch{ {Name: "branches", Version: "v1"}, {Name: "needle", Version: "v2"}, }, }, - zoekt.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}}, - zoekt.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}}, - zoekt.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}}) + index.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}}, + index.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}}, + index.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}}) for _, tt := range cases { for _, useDocumentRanks := range []bool{false, true} { @@ -1099,11 +1282,11 @@ func TestAtomCountScore(t *testing.T) { } func TestUseBM25Scoring(t *testing.T) { - b := testIndexBuilder(t, + b := testShardBuilder(t, &zoekt.Repository{}, - zoekt.Document{Name: "f1", Content: []byte("one two two three")}, - zoekt.Document{Name: "f2", Content: []byte("one two one two")}, - zoekt.Document{Name: "f3", Content: []byte("one three three three")}) + index.Document{Name: "f1", Content: []byte("one two two three")}, + index.Document{Name: "f2", Content: []byte("one two one two")}, + index.Document{Name: "f3", Content: []byte("one three three three")}) ss := newShardedSearcher(1) searcher := searcherForTest(t, b) @@ -1133,7 +1316,7 @@ func TestUseBM25Scoring(t *testing.T) { } } -func testShardedStreamSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { +func testShardedStreamSearch(t *testing.T, q query.Q, ib *index.ShardBuilder, useDocumentRanks bool) []zoekt.FileMatch { ss := newShardedSearcher(1) searcher := searcherForTest(t, ib) ss.replace(map[string]zoekt.Searcher{"r1": searcher}) @@ -1153,7 +1336,7 @@ func testShardedStreamSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, us return files } -func testShardedSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { +func testShardedSearch(t *testing.T, q query.Q, ib *index.ShardBuilder, useDocumentRanks bool) []zoekt.FileMatch { ss := newShardedSearcher(1) searcher := searcherForTest(t, ib) ss.replace(map[string]zoekt.Searcher{"r1": searcher}) diff --git a/shards/watcher.go b/search/watcher.go similarity index 97% rename from shards/watcher.go rename to search/watcher.go index f64163f0c..102418ee2 100644 --- a/shards/watcher.go +++ b/search/watcher.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package shards +package search import ( "fmt" @@ -26,7 +26,8 @@ import ( "time" "github.com/fsnotify/fsnotify" - "github.com/sourcegraph/zoekt" + + "github.com/sourcegraph/zoekt/index" ) type shardLoader interface { @@ -130,7 +131,7 @@ func (s *DirectoryWatcher) scan() error { // In the case of downgrades, avoid reading // newer index formats. - if version > zoekt.IndexFormatVersion && version > zoekt.NextIndexFormatVersion { + if version > index.IndexFormatVersion && version > index.NextIndexFormatVersion { continue } diff --git a/shards/watcher_test.go b/search/watcher_test.go similarity index 93% rename from shards/watcher_test.go rename to search/watcher_test.go index be91fb4ba..c235c65d0 100644 --- a/shards/watcher_test.go +++ b/search/watcher_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package shards +package search import ( "fmt" @@ -21,7 +21,7 @@ import ( "testing" "time" - "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" ) type loggingLoader struct { @@ -99,9 +99,14 @@ func TestDirWatcherUnloadOnce(t *testing.T) { dw.Stop() + // fsnotify can produce multiple events for a single write, which can lead to + // extra queued load notifications by the time we stop. Drain them and only + // assert we don't drop the same shard again. + for len(logger.loads) > 0 { + <-logger.loads + } + select { - case k := <-logger.loads: - t.Errorf("spurious load of %q", k) case k := <-logger.drops: t.Errorf("spurious drops of %q", k) default: @@ -182,7 +187,7 @@ func TestDirWatcherLoadLatest(t *testing.T) { // t.Fatalf("got %v, want 'empty'", err) // } - want := zoekt.NextIndexFormatVersion + want := index.NextIndexFormatVersion shardLatest := filepath.Join(dir, fmt.Sprintf("foo_v%d.00000.zoekt", want)) for delta := -1; delta <= 1; delta++ { diff --git a/shell.nix b/shell.nix index 9ca3d1dd4..ab3fc9936 100644 --- a/shell.nix +++ b/shell.nix @@ -1,4 +1,8 @@ -{ pkgs }: +{ pkgs ? import { + overlays = [ + (import ./ctag-overlay.nix) + ]; +}}: let # pkgs.universal-ctags installs the binary as "ctags", not "universal-ctags" # like zoekt expects. @@ -11,7 +15,7 @@ pkgs.mkShell { name = "zoekt"; nativeBuildInputs = [ - pkgs.go_1_22 + pkgs.go_1_23 # zoekt-git-index pkgs.git diff --git a/testdata/shards/repo17_v17.00000.zoekt b/testdata/shards/repo17_v17.00000.zoekt index 9446a10e3..d96ac0cad 100644 Binary files a/testdata/shards/repo17_v17.00000.zoekt and b/testdata/shards/repo17_v17.00000.zoekt differ diff --git a/testdata/shards/repo2_v16.00000.zoekt b/testdata/shards/repo2_v16.00000.zoekt index eb5aa4d6b..2803b76da 100644 Binary files a/testdata/shards/repo2_v16.00000.zoekt and b/testdata/shards/repo2_v16.00000.zoekt differ diff --git a/testdata/shards/repo_v16.00000.zoekt b/testdata/shards/repo_v16.00000.zoekt index ee0513349..0b370b91d 100644 Binary files a/testdata/shards/repo_v16.00000.zoekt and b/testdata/shards/repo_v16.00000.zoekt differ diff --git a/tombstones_unix.go b/tombstones_unix.go deleted file mode 100644 index f983b5096..000000000 --- a/tombstones_unix.go +++ /dev/null @@ -1,14 +0,0 @@ -//go:build !windows && !wasm - -package zoekt - -import ( - "os" - - "golang.org/x/sys/unix" -) - -func init() { - umask = os.FileMode(unix.Umask(0)) - unix.Umask(int(umask)) -} diff --git a/tombstones_windows.go b/tombstones_windows.go deleted file mode 100644 index 3b7d6ffa1..000000000 --- a/tombstones_windows.go +++ /dev/null @@ -1,6 +0,0 @@ -package zoekt - -func init() { - // no setting of file permissions on Windows - umask = 0 -} diff --git a/web/api.go b/web/api.go index 4f8c1468c..072e5e79c 100644 --- a/web/api.go +++ b/web/api.go @@ -54,7 +54,7 @@ type FileMatch struct { ResultID string Language string // If this was a duplicate result, this will contain the file - // of the first match. + // of the first index. DuplicateID string Branches []string diff --git a/web/doc.go b/web/doc.go new file mode 100644 index 000000000..dfb80185e --- /dev/null +++ b/web/doc.go @@ -0,0 +1,15 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package web contains the logic for spinning up a zoekt webserver. It's exposed separately +// from zoekt-webserver to allow for customizing the endpoints and format templates. +package web diff --git a/web/e2e_test.go b/web/e2e_test.go index 2b234837e..03fc967af 100644 --- a/web/e2e_test.go +++ b/web/e2e_test.go @@ -32,6 +32,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/sourcegraph/zoekt" + "github.com/sourcegraph/zoekt/index" "github.com/sourcegraph/zoekt/query" ) @@ -54,14 +55,14 @@ func (s *memSeeker) Name() string { return "memSeeker" } -func searcherForTest(t *testing.T, b *zoekt.IndexBuilder) zoekt.Streamer { +func searcherForTest(t *testing.T, b *index.ShardBuilder) zoekt.Streamer { var buf bytes.Buffer if err := b.Write(&buf); err != nil { t.Fatal(err) } f := &memSeeker{buf.Bytes()} - searcher, err := zoekt.NewSearcher(f) + searcher, err := index.NewSearcher(f) if err != nil { t.Fatalf("NewSearcher: %v", err) } @@ -83,7 +84,7 @@ func (a adapter) StreamSearch(ctx context.Context, q query.Q, opts *zoekt.Search } func TestBasic(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewShardBuilder(&zoekt.Repository{ Name: "name", URL: "repo-url", CommitURLTemplate: `{{ URLJoinPath "https://github.com/org/repo/commit/" .Version}}`, @@ -92,9 +93,9 @@ func TestBasic(t *testing.T) { Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, }) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ // use a name which requires correct escaping. https://github.com/sourcegraph/zoekt/issues/807 Name: "foo/bar+baz", Content: []byte("to carry water in the no later bla"), @@ -149,7 +150,7 @@ func TestBasic(t *testing.T) { } func TestPrint(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewShardBuilder(&zoekt.Repository{ Name: "name", URL: "repo-url", CommitURLTemplate: "{{.Version}}", @@ -158,9 +159,9 @@ func TestPrint(t *testing.T) { Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, }) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f2", Content: []byte("to carry water in the no later bla"), Branches: []string{"master"}, @@ -168,7 +169,7 @@ func TestPrint(t *testing.T) { t.Fatalf("Add: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "dir/f2", Content: []byte("blabla"), Branches: []string{"master"}, @@ -202,15 +203,15 @@ func TestPrint(t *testing.T) { } func TestPrintDefault(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewShardBuilder(&zoekt.Repository{ Name: "name", URL: "repo-url", Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, }) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f2", Content: []byte("to carry water in the no later bla"), Branches: []string{"master"}, @@ -272,15 +273,15 @@ type Expectation struct { } func TestFormatJson(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewShardBuilder(&zoekt.Repository{ Name: "name", URL: "repo-url", Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, }) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f2", Content: []byte("to carry water in the no later bla"), Branches: []string{"master"}, @@ -327,36 +328,36 @@ func TestFormatJson(t *testing.T) { } func TestContextLines(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewShardBuilder(&zoekt.Repository{ Name: "name", URL: "repo-url", Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, }) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f2", Content: []byte("one line\nsecond snippet\nthird thing\nfourth\nfifth block\nsixth example\nseventh"), Branches: []string{"master"}, }); err != nil { t.Fatalf("Add: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f3", Content: []byte("\n\n\n\nto carry water in the no later bla\n\n\n\n"), Branches: []string{"master"}, }); err != nil { t.Fatalf("Add: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f4", Content: []byte("un \n \n\ttrois\n \n\nsix\n "), Branches: []string{"master"}, }); err != nil { t.Fatalf("Add: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f5", Content: []byte("\ngreen\npastures\n\nhere"), Branches: []string{"master"}, @@ -440,7 +441,7 @@ func TestContextLines(t *testing.T) { }, }, "/search?q=one&format=json&ctx=2": { - "match at start returns After but no Before", + "index at start returns After but no Before", FileMatch{ FileName: "f2", Repo: "name", @@ -461,7 +462,7 @@ func TestContextLines(t *testing.T) { }, }, "/search?q=seventh&format=json&ctx=2": { - "match at end returns Before but no After", + "index at end returns Before but no After", FileMatch{ FileName: "f2", Repo: "name", @@ -482,7 +483,7 @@ func TestContextLines(t *testing.T) { }, }, "/search?q=seventh&format=json&ctx=10": { - "match with large context at end returns whole document", + "index with large context at end returns whole document", FileMatch{ FileName: "f2", Repo: "name", @@ -503,7 +504,7 @@ func TestContextLines(t *testing.T) { }, }, "/search?q=one&format=json&ctx=10": { - "match with large context at start returns whole document", + "index with large context at start returns whole document", FileMatch{ FileName: "f2", Repo: "name", @@ -651,15 +652,15 @@ func checkResultMatches(t *testing.T, ts *httptest.Server, req string, expected } func TestContextLinesMustBeValid(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewShardBuilder(&zoekt.Repository{ Name: "name", URL: "repo-url", Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, }) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "f2", Content: []byte("to carry water in the no later bla"), Branches: []string{"master"}, @@ -746,13 +747,13 @@ func TestCrash(t *testing.T) { } func TestHostCustomization(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewShardBuilder(&zoekt.Repository{ Name: "name", }) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "file", Content: []byte("bla"), }); err != nil { @@ -798,15 +799,15 @@ func TestHostCustomization(t *testing.T) { } func TestDupResult(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewShardBuilder(&zoekt.Repository{ Name: "name", }) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } - for i := 0; i < 2; i++ { - if err := b.Add(zoekt.Document{ + for i := range 2 { + if err := b.Add(index.Document{ Name: fmt.Sprintf("file%d", i), Content: []byte("bla"), }); err != nil { @@ -848,15 +849,15 @@ func TestDupResult(t *testing.T) { } func TestTruncateLine(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewShardBuilder(&zoekt.Repository{ Name: "name", }) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } largePadding := bytes.Repeat([]byte{'a'}, 100*1000) // 100kb - if err := b.Add(zoekt.Document{ + if err := b.Add(index.Document{ Name: "file", Content: append(append(largePadding, []byte("helloworld")...), largePadding...), }); err != nil { @@ -904,15 +905,15 @@ func TestTruncateLine(t *testing.T) { } func TestHealthz(t *testing.T) { - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ + b, err := index.NewShardBuilder(&zoekt.Repository{ Name: "name", }) if err != nil { - t.Fatalf("NewIndexBuilder: %v", err) + t.Fatalf("NewShardBuilder: %v", err) } - for i := 0; i < 2; i++ { - if err := b.Add(zoekt.Document{ + for i := range 2 { + if err := b.Add(index.Document{ Name: fmt.Sprintf("file%d", i), Content: []byte("bla"), }); err != nil { diff --git a/web/server.go b/web/server.go index 9e23af8b8..ee4bc5c34 100644 --- a/web/server.go +++ b/web/server.go @@ -33,9 +33,11 @@ import ( "github.com/grafana/regexp" + "github.com/sourcegraph/zoekt/index" + zjson "github.com/sourcegraph/zoekt/internal/json" + "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/internal/tenant/systemtenant" - zjson "github.com/sourcegraph/zoekt/json" "github.com/sourcegraph/zoekt/query" ) @@ -46,6 +48,9 @@ var Funcmap = template.FuncMap{ "More": func(orig int) int { return orig * 3 }, + "AddLineNumbers": func(content string, lineNum int, isBefore bool) []lineMatch { + return AddLineNumbers(content, lineNum, isBefore) + }, "HumanUnit": func(orig int64) string { b := orig suffix := "" @@ -79,6 +84,42 @@ var Funcmap = template.FuncMap{ }, } +// lineMatch represents a line of content with its associated line number +type lineMatch struct { + LineNum int + Content string +} + +// AddLineNumbers adds line numbers to the beginning of each line in the given content string. +// The line numbers are relative to the current line number (lineNum). +// For 'before' content, numbers will count backwards from lineNum-1. +// For 'after' content, numbers will count forwards from lineNum+1. +func AddLineNumbers(content string, lineNum int, isBefore bool) []lineMatch { + if content == "" { + return nil + } + + lines := strings.Split(content, "\n") + var result []lineMatch + + for i, line := range lines { + if i == len(lines)-1 && line == "" { + continue + } + + var num int + if isBefore { + num = lineNum - len(lines) + i + } else { + num = lineNum + i + 1 + } + + // Add the line number and content to the result + result = append(result, lineMatch{LineNum: num, Content: line}) + } + return result +} + const defaultNumResults = 50 type Server struct { @@ -153,7 +194,7 @@ func (s *Server) getTextTemplate(str string) *texttemplate.Template { return t } - t, err := zoekt.ParseTemplate(str) + t, err := index.ParseTemplate(str) if err != nil { log.Printf("text template parse error: %v", err) t = texttemplate.Must(texttemplate.New("empty").Parse("")) @@ -614,6 +655,13 @@ func (s *Server) servePrintErr(w http.ResponseWriter, r *http.Request) error { f := result.Files[0] + if qvals.Get("format") == "raw" { + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + w.Header().Set("X-Content-Type-Options", "nosniff") + _, _ = w.Write(f.Content) + return nil + } + byteLines := bytes.Split(f.Content, []byte{'\n'}) strLines := make([]string, 0, len(byteLines)) for _, l := range byteLines { diff --git a/web/server_test.go b/web/server_test.go new file mode 100644 index 000000000..16315c69f --- /dev/null +++ b/web/server_test.go @@ -0,0 +1,96 @@ +package web + +import ( + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestAddLineNumbers(t *testing.T) { + tests := []struct { + name string + content string + lineNum int + isBefore bool + want []lineMatch + }{ + { + name: "empty content", + content: "", + lineNum: 10, + isBefore: true, + want: nil, + }, + { + name: "single line before", + content: "hello world", + lineNum: 10, + isBefore: true, + want: []lineMatch{ + {LineNum: 9, Content: "hello world"}, + }, + }, + { + name: "single line after", + content: "hello world", + lineNum: 10, + isBefore: false, + want: []lineMatch{ + {LineNum: 11, Content: "hello world"}, + }, + }, + { + name: "multiple lines before", + content: "first line\nsecond line\nthird line", + lineNum: 10, + isBefore: true, + want: []lineMatch{ + {LineNum: 7, Content: "first line"}, + {LineNum: 8, Content: "second line"}, + {LineNum: 9, Content: "third line"}, + }, + }, + { + name: "multiple lines after", + content: "first line\nsecond line\nthird line", + lineNum: 10, + isBefore: false, + want: []lineMatch{ + {LineNum: 11, Content: "first line"}, + {LineNum: 12, Content: "second line"}, + {LineNum: 13, Content: "third line"}, + }, + }, + { + name: "content with empty lines before", + content: "first line\n\nthird line", + lineNum: 10, + isBefore: true, + want: []lineMatch{ + {LineNum: 7, Content: "first line"}, + {LineNum: 8, Content: ""}, + {LineNum: 9, Content: "third line"}, + }, + }, + { + name: "content with empty lines after", + content: "first line\n\nthird line", + lineNum: 10, + isBefore: false, + want: []lineMatch{ + {LineNum: 11, Content: "first line"}, + {LineNum: 12, Content: ""}, + {LineNum: 13, Content: "third line"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := AddLineNumbers(tt.content, tt.lineNum, tt.isBefore) + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("AddLineNumbers() mismatch (-want +got):\n%s", diff) + } + }) + } +} diff --git a/web/templates.go b/web/templates.go index 2c5f4026a..e5aa2ced0 100644 --- a/web/templates.go +++ b/web/templates.go @@ -34,6 +34,7 @@ var TemplateText = map[string]string{