diff --git a/fuse.go b/fuse.go index 8a929b7..c38dcc8 100644 --- a/fuse.go +++ b/fuse.go @@ -529,8 +529,8 @@ func (n *cacheDownloadNode) lookupUnderModule(ctx context.Context, name string, dotExt := filepath.Ext(name) switch dotExt { - case ".info", ".mod", ".ziphash": - ext := dotExt[1:] // "info", "mod", "ziphash" + case ".info", ".mod", ".ziphash", ".zip": + ext := dotExt[1:] // "info", "mod", "ziphash", "zip" sp := n.fs.Stats.StartSpan("get-metafile-" + ext) defer func() { diff --git a/go.mod b/go.mod index edb35ab..718969e 100644 --- a/go.mod +++ b/go.mod @@ -39,6 +39,7 @@ require ( github.com/prometheus/common v0.65.0 // indirect github.com/prometheus/procfs v0.16.1 // indirect github.com/sergi/go-diff v1.4.0 // indirect + github.com/tomhjp/synthzip v0.1.0 // indirect golang.org/x/crypto v0.40.0 // indirect golang.org/x/exp v0.0.0-20250210185358-939b2ce775ac // indirect google.golang.org/protobuf v1.36.6 // indirect diff --git a/go.sum b/go.sum index a5fcc9c..4859760 100644 --- a/go.sum +++ b/go.sum @@ -90,6 +90,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tomhjp/synthzip v0.1.0 h1:amKn0GMYLPtJdMgARvjY7nNkjHOc298pY0WdgNdOHwg= +github.com/tomhjp/synthzip v0.1.0/go.mod h1:xzyQ03xr90LexJYX91KIXNa193WoOFiBMwTMgTEMNj0= github.com/willscott/go-nfs v0.0.3 h1:Z5fHVxMsppgEucdkKBN26Vou19MtEM875NmRwj156RE= github.com/willscott/go-nfs v0.0.3/go.mod h1:VhNccO67Oug787VNXcyx9JDI3ZoSpqoKMT/lWMhUIDg= github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886 h1:DtrBtkgTJk2XGt4T7eKdKVkd9A5NCevN2e4inLXtsqA= diff --git a/gomodfs.go b/gomodfs.go index 4a412f0..d606400 100644 --- a/gomodfs.go +++ b/gomodfs.go @@ -34,6 +34,7 @@ import ( "github.com/tailscale/gomodfs/internal/lru" "github.com/tailscale/gomodfs/stats" "github.com/tailscale/gomodfs/store" + "github.com/tomhjp/synthzip" "golang.org/x/mod/module" "golang.org/x/mod/sumdb/dirhash" "golang.org/x/sync/singleflight" @@ -272,6 +273,7 @@ type putFile struct { func (pf putFile) Path() string { return pf.path } func (pf putFile) Size() int64 { return int64(pf.zf.UncompressedSize64) } +func (pf putFile) CRC32() uint32 { return pf.zf.CRC32 } func (pf putFile) Open() (io.ReadCloser, error) { return pf.zf.Open() } func (pf putFile) Mode() os.FileMode { return pf.zf.Mode() } @@ -396,7 +398,7 @@ func (fs *FS) getZipRoot(ctx context.Context, mv store.ModuleVersion) (mh store. return rooti.(store.ModHandle), nil } -// ext is one of "mod", "ziphash", "info". +// ext is one of "mod", "ziphash", "info", "zip". func (fs *FS) getMetaFileByExt(ctx context.Context, mv store.ModuleVersion, ext string) ([]byte, error) { switch ext { case "mod": @@ -405,6 +407,8 @@ func (fs *FS) getMetaFileByExt(ctx context.Context, mv store.ModuleVersion, ext return fs.getZiphash(ctx, mv) case "info": return fs.getInfoFile(ctx, mv) + case "zip": + return fs.getZipFileData(ctx, mv) } return nil, fmt.Errorf("unknown meta file extension %q", ext) } @@ -444,6 +448,64 @@ func (fs *FS) getZiphash(ctx context.Context, mv store.ModuleVersion) (data []by return fs.Store.GetZipHash(ctx, zr) } +// getZipArchive returns a synthetic zip archive for the given module version. +// The returned archive is cheap to construct (metadata only) and supports ReadAt +// for lazy content loading. +func (fs *FS) getZipArchive(ctx context.Context, mv store.ModuleVersion) (*synthzip.Archive, error) { + mh, err := fs.getZipRoot(ctx, mv) + if err != nil { + return nil, err + } + entries, err := fs.Store.GetZipFileEntries(ctx, mh) + if err != nil { + return nil, fmt.Errorf("GetZipFileEntries for %v: %w", mv, err) + } + + // Module zip files have paths like "module@version/path". + prefix := mv.Module + "@" + mv.Version + "/" + + files := make([]synthzip.File, len(entries)) + for i, e := range entries { + files[i] = synthzip.File{ + Name: prefix + e.Path, + Size: e.Size, + CRC32: e.CRC32, + } + } + + return synthzip.New(files, func(name string) (io.ReadCloser, error) { + // Strip the module@version/ prefix to get the store path. + p := strings.TrimPrefix(name, prefix) + data, err := fs.Store.GetFile(ctx, mh, p) + if err != nil { + return nil, err + } + return io.NopCloser(bytes.NewReader(data)), nil + }) +} + +// getZipFileData returns the full zip file bytes for the given module version. +func (fs *FS) getZipFileData(ctx context.Context, mv store.ModuleVersion) ([]byte, error) { + archive, err := fs.getZipArchive(ctx, mv) + if err != nil { + return nil, err + } + data := make([]byte, archive.Size()) + if _, err := archive.ReadAt(data, 0); err != nil && err != io.EOF { + return nil, fmt.Errorf("reading zip archive for %v: %w", mv, err) + } + return data, nil +} + +// getZipFileSize returns the size of the synthetic zip file without reading content. +func (fs *FS) getZipFileSize(ctx context.Context, mv store.ModuleVersion) (int64, error) { + archive, err := fs.getZipArchive(ctx, mv) + if err != nil { + return 0, err + } + return archive.Size(), nil +} + func (fs *FS) walkStoreModulePaths(ctx context.Context, mh store.ModHandle) iter.Seq[result.Of[string]] { return func(yield func(result.Of[string]) bool) { var doDir func(string) bool // recursive dir walker, called with "" (root) or "path/to/file-or-dir" @@ -489,6 +551,7 @@ var ( cdFileInfo = mkWellKnownPathHash("info") cdFileMod = mkWellKnownPathHash("mod") cdFileZiphash = mkWellKnownPathHash("ziphash") + cdFileZip = mkWellKnownPathHash("zip") pathHashTSGo = mkWellKnownPathHash(wkTSGoExtracted) ) diff --git a/nfs.go b/nfs.go index 093d8f2..1b618cd 100644 --- a/nfs.go +++ b/nfs.go @@ -233,6 +233,15 @@ func (b billyFS) Lstat(filename string) (os.FileInfo, error) { ctx := context.TODO() if ext := mp.CacheDownloadFileExt; ext != "" { + if ext == "zip" { + // Avoid materializing the entire zip just for stat. + size, err := b.fs.getZipFileSize(ctx, mp.ModVersion) + if err != nil { + b.fs.logf("Failed to get zip size for %v: %v", mp.ModVersion, err) + return nil, syscall.EIO + } + return regFileInfo{name: filepath.Base(filename), size: size}, nil + } v, err := b.fs.getMetaFileByExt(ctx, mp.ModVersion, ext) if err != nil { b.fs.logf("Failed to get %s file for %v: %v", ext, mp.ModVersion, err) @@ -437,6 +446,8 @@ func (h *NFSHandler) fromHandle(handle handle) (ret handleTarget, err error) { return mkTargetFromPath(cdPath(mv, "mod")), nil case cdFileZiphash: return mkTargetFromPath(cdPath(mv, "ziphash")), nil + case cdFileZip: + return mkTargetFromPath(cdPath(mv, "zip")), nil case pathHashTSGo: if trip, ok := isTSGoModule(mv); ok { return mkTargetFromPath(tsGoZipRoot(trip) + ".extracted"), nil diff --git a/nfs_test.go b/nfs_test.go index c3b112c..385bcfa 100644 --- a/nfs_test.go +++ b/nfs_test.go @@ -5,6 +5,7 @@ package gomodfs import ( "archive/zip" + "bytes" "context" "fmt" "io" @@ -252,6 +253,7 @@ func TestNFSHandles(t *testing.T) { "cache/download/go4.org/mem/@v/v0.0.0-20240501181205-ae6ca9944745.info", "cache/download/go4.org/mem/@v/v0.0.0-20240501181205-ae6ca9944745.mod", "cache/download/go4.org/mem/@v/v0.0.0-20240501181205-ae6ca9944745.ziphash", + "cache/download/go4.org/mem/@v/v0.0.0-20240501181205-ae6ca9944745.zip", "tsgo-linux-amd64/1cd3bf1a6eaf559aa8c00e749289559c884cef09.extracted", "tsgo-linux-amd64/1cd3bf1a6eaf559aa8c00e749289559c884cef09/fake.bash", "tsgo-linux-amd64/1cd3bf1a6eaf559aa8c00e749289559c884cef09/bin/gofmt", @@ -302,6 +304,78 @@ func TestNFSHandles(t *testing.T) { } } +func TestZipFile(t *testing.T) { + gitCacheDir := testGitDir(t) + h := testNFSHandler(t, gitCacheDir) + + // Trigger download of the go4.org/mem module. + wantRegSize(t, h, "go4.org/mem@v0.0.0-20240501181205-ae6ca9944745/LICENSE", 11358) + + // Read the synthetic .zip file. + zipPath := "cache/download/go4.org/mem/@v/v0.0.0-20240501181205-ae6ca9944745.zip" + ctx := context.Background() + data, _, err := h.getFileContentsUncached(ctx, zipPath) + if err != nil { + t.Fatalf("getFileContentsUncached(%q): %v", zipPath, err) + } + if len(data) == 0 { + t.Fatal("zip file is empty") + } + + // Verify it's a valid zip archive. + zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + t.Fatalf("zip.NewReader: %v", err) + } + + // Verify files are present and match individually-cached content. + const mod = "go4.org/mem" + const ver = "v0.0.0-20240501181205-ae6ca9944745" + prefix := mod + "@" + ver + "/" + + for _, zf := range zr.File { + if !strings.HasPrefix(zf.Name, prefix) { + t.Errorf("zip entry %q doesn't start with expected prefix %q", zf.Name, prefix) + continue + } + relPath := strings.TrimPrefix(zf.Name, prefix) + + // Read from zip. + rc, err := zf.Open() + if err != nil { + t.Errorf("open zip entry %q: %v", zf.Name, err) + continue + } + zipContent, err := io.ReadAll(rc) + rc.Close() + if err != nil { + t.Errorf("read zip entry %q: %v", zf.Name, err) + continue + } + + // Read from store. + storePath := mod + "@" + ver + "/" + relPath + storeData, _, err := h.getFileContentsUncached(ctx, storePath) + if err != nil { + t.Errorf("getFileContentsUncached(%q): %v", storePath, err) + continue + } + + if !bytes.Equal(zipContent, storeData) { + t.Errorf("content mismatch for %q: zip has %d bytes, store has %d bytes", relPath, len(zipContent), len(storeData)) + } + } + + // Verify stat returns the correct size. + fi, err := h.billyFS().Lstat(zipPath) + if err != nil { + t.Fatalf("Lstat(%q): %v", zipPath, err) + } + if fi.Size() != int64(len(data)) { + t.Errorf("Lstat size = %d; want %d", fi.Size(), len(data)) + } +} + var ( handleOnce sync.Once handlePath string diff --git a/path.go b/path.go index 0a8c923..26f35ba 100644 --- a/path.go +++ b/path.go @@ -108,7 +108,7 @@ func parsePath(name string) (ret gmPath) { return } switch ext { - case ".mod", ".ziphash", ".info": + case ".mod", ".ziphash", ".info", ".zip": ret.CacheDownloadFileExt = ext[1:] default: // Not a recognized cache/download file. diff --git a/path_test.go b/path_test.go index 54d53ee..111251b 100644 --- a/path_test.go +++ b/path_test.go @@ -36,6 +36,13 @@ var pathTests = []struct { ModVersion: store.ModuleVersion{Module: "go4.org/mem", Version: "v0.0.0-20240501181205-ae6ca9944745"}, }, }, + { + path: "cache/download/go4.org/mem/@v/v0.0.0-20240501181205-ae6ca9944745.zip", + want: gmPath{ + CacheDownloadFileExt: "zip", + ModVersion: store.ModuleVersion{Module: "go4.org/mem", Version: "v0.0.0-20240501181205-ae6ca9944745"}, + }, + }, { path: "cache/download/go4.org/mem/@v/v0.0.0-20240501181205-ae6ca9944745.bogusext", want: gmPath{NotExist: true}, diff --git a/store/gitstore/gitstore.go b/store/gitstore/gitstore.go index cfbaf8f..67ad2ca 100644 --- a/store/gitstore/gitstore.go +++ b/store/gitstore/gitstore.go @@ -677,9 +677,10 @@ type modHandle struct { // blobMeta is the metadata for a blob in a tree. type blobMeta struct { - Blob objRef - Size int64 - Mode os.FileMode + Blob objRef + Size int64 + Mode os.FileMode + CRC32 uint32 // CRC-32 checksum; 0 if not known (old cache entries) } func (s *Storage) GetFile(ctx context.Context, h store.ModHandle, path string) ([]byte, error) { @@ -955,9 +956,50 @@ func (s *Storage) newModHandle(mv store.ModuleVersion, modTree objRef) (store.Mo } mh.dirEnts[dir] = append(mh.dirEnts[dir], ent) } + + // Try to load CRC32 data from the zipcrc blob. + if crcObj, err := s.getObject(context.TODO(), modTree.String()+":zipcrc", "blob"); err == nil { + for line := range bytes.Lines(crcObj.content) { + line = bytes.TrimRight(line, "\n") + if len(line) == 0 { + continue + } + hexCRC, filePath, ok := bytes.Cut(line, []byte("\t")) + if !ok || len(hexCRC) != 8 { + continue + } + crc, err := strconv.ParseUint(string(hexCRC), 16, 32) + if err != nil { + continue + } + p := string(filePath) + if bm, ok := mh.blobMeta[p]; ok { + bm.CRC32 = uint32(crc) + mh.blobMeta[p] = bm + } + } + } + // If zipcrc doesn't exist (old cache entries), CRC32 fields stay 0. + return mh, nil } +func (s *Storage) GetZipFileEntries(_ context.Context, h store.ModHandle) ([]store.ZipFileEntry, error) { + mh := h.(*modHandle) + entries := make([]store.ZipFileEntry, 0, len(mh.blobMeta)) + for p, bm := range mh.blobMeta { + entries = append(entries, store.ZipFileEntry{ + Path: p, + Size: bm.Size, + CRC32: bm.CRC32, + }) + } + slices.SortFunc(entries, func(a, b store.ZipFileEntry) int { + return cmp.Compare(a.Path, b.Path) + }) + return entries, nil +} + func (s *Storage) GetZipHash(ctx context.Context, h store.ModHandle) ([]byte, error) { tree := h.(*modHandle).modTree @@ -995,6 +1037,26 @@ func (s *Storage) PutModule(ctx context.Context, mv store.ModuleVersion, data st return nil, fmt.Errorf("failed to add %s file: %w", ext, err) } } + // Build the zipcrc blob: one line per file, sorted by path. + // Format: "<8-hex-crc32>\t\n" + var hasCRC bool + for _, f := range data.Files { + if f.CRC32() != 0 { + hasCRC = true + break + } + } + if hasCRC { + var crcBuf bytes.Buffer + for _, f := range data.Files { + fmt.Fprintf(&crcBuf, "%08x\t%s\n", f.CRC32(), f.Path()) + } + crcData := crcBuf.Bytes() + if err := tb.addFile("zipcrc", static(crcData), 0644); err != nil { + return nil, fmt.Errorf("failed to add zipcrc file: %w", err) + } + } + for _, f := range data.Files { if err := tb.addFile("zip/"+f.Path(), f.Open, f.Mode()); err != nil { return nil, fmt.Errorf("failed to add zip file %q: %w", f.Path(), err) diff --git a/store/store.go b/store/store.go index 82f4390..6071f31 100644 --- a/store/store.go +++ b/store/store.go @@ -49,6 +49,10 @@ type Store interface { // path is "" for root, else "foo" or "foo/bar" (no trailing slash) Readdir(ctx context.Context, h ModHandle, path string) ([]Dirent, error) + // GetZipFileEntries returns metadata for all files in the module's zip, + // sufficient to construct a synthetic zip archive without reading file contents. + GetZipFileEntries(ctx context.Context, h ModHandle) ([]ZipFileEntry, error) + // PutModule populates the store with the given module version data // when ErrCacheMiss is returned. PutModule(context.Context, ModuleVersion, PutModuleData) (ModHandle, error) @@ -71,6 +75,7 @@ type PutFile interface { Path() string Mode() fs.FileMode // only 0644 or 0755 are valid Size() int64 // size of the file in bytes + CRC32() uint32 // CRC-32 checksum (IEEE polynomial) of the file content Open() (io.ReadCloser, error) } @@ -80,6 +85,13 @@ type Dirent struct { Size int64 // for regular files } +// ZipFileEntry describes a file within a module's zip for synthetic zip construction. +type ZipFileEntry struct { + Path string // path within zip, relative to module root (e.g. "go.mod") + Size int64 + CRC32 uint32 +} + type ModuleVersion struct { Module string // unescaped, in original case (e.g. "github.com/Foo/bar") Version string // unescaped, in original case (e.g. "v1.2.3") diff --git a/tsgo.go b/tsgo.go index f6cd88b..bfbeea0 100644 --- a/tsgo.go +++ b/tsgo.go @@ -56,6 +56,7 @@ type tsgoFile struct { func (f tsgoFile) Path() string { return f.path } func (f tsgoFile) Size() int64 { return int64(len(f.content)) } +func (f tsgoFile) CRC32() uint32 { return 0 } // not used for tsgo func (f tsgoFile) Open() (io.ReadCloser, error) { return io.NopCloser(bytes.NewReader(f.content)), nil } func (f tsgoFile) Mode() os.FileMode { return f.mode } diff --git a/webdav.go b/webdav.go index 51a9aaa..0f8a748 100644 --- a/webdav.go +++ b/webdav.go @@ -97,6 +97,16 @@ func (d webdavFS) Stat(ctx context.Context, name string) (fi os.FileInfo, retErr return regFileInfo{name: name, size: 123}, nil } if ext := dp.CacheDownloadFileExt; ext != "" { + if ext == "zip" { + sp := d.fs.Stats.StartSpan("webdav.Stat-et-zip") + size, err := d.fs.getZipFileSize(ctx, dp.ModVersion) + sp.End(err) + if err != nil { + log.Printf("Failed to get zip size for %v: %v", dp.ModVersion, err) + return nil, syscall.EIO + } + return regFileInfo{name: name, size: size}, nil + } sp := d.fs.Stats.StartSpan("webdav.Stat-et-" + ext) v, err := d.fs.getMetaFileByExt(ctx, dp.ModVersion, ext) sp.End(err) diff --git a/winfsp.go b/winfsp.go index fc35918..c2aebad 100644 --- a/winfsp.go +++ b/winfsp.go @@ -172,6 +172,16 @@ func (pfs *fspFS) Stat(name string) (fi os.FileInfo, retErr error) { return regFileInfo{name: name, size: int64(len(tsgoExtractedFileContents(trip.Hash)))}, nil } if ext := dp.CacheDownloadFileExt; ext != "" { + if ext == "zip" { + sp := d.fs.Stats.StartSpan("fsp.Stat-et-zip") + size, err := d.fs.getZipFileSize(ctx, dp.ModVersion) + sp.End(err) + if err != nil { + log.Printf("Failed to get zip size for %v: %v", dp.ModVersion, err) + return nil, syscall.EIO + } + return regFileInfo{name: name, size: size}, nil + } sp := d.fs.Stats.StartSpan("fsp.Stat-et-" + ext) v, err := d.fs.getMetaFileByExt(ctx, dp.ModVersion, ext) sp.End(err)