diff --git a/go.mod b/go.mod index 5db8855..e200967 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/goforj/wire -go 1.19 +go 1.22 require ( github.com/fsnotify/fsnotify v1.7.0 @@ -11,7 +11,9 @@ require ( ) require ( + github.com/klauspost/cpuid/v2 v2.2.10 // indirect + github.com/zeebo/xxh3 v1.1.0 // indirect golang.org/x/mod v0.20.0 // indirect golang.org/x/sync v0.8.0 // indirect - golang.org/x/sys v0.23.0 // indirect + golang.org/x/sys v0.30.0 // indirect ) diff --git a/go.sum b/go.sum index 6fba262..6c326e7 100644 --- a/go.sum +++ b/go.sum @@ -4,13 +4,19 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/subcommands v1.2.0 h1:vWQspBTo2nEqTUFita5/KeEWlUL8kQObDFbub/EN9oE= github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk= +github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE= +github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs= +github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s= golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/tools v0.24.1 h1:vxuHLTNS3Np5zrYoPRpcheASHX/7KiGo+8Y4ZM1J2O8= golang.org/x/tools v0.24.1/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= diff --git a/internal/loader/artifact_cache.go b/internal/loader/artifact_cache.go index a6dfdb1..560c596 100644 --- a/internal/loader/artifact_cache.go +++ b/internal/loader/artifact_cache.go @@ -16,15 +16,15 @@ package loader import ( "bytes" - "crypto/sha256" - "encoding/hex" "go/token" "go/types" "io" "os" "path/filepath" "runtime" + "strconv" + "github.com/zeebo/xxh3" "golang.org/x/tools/go/gcexportdata" "github.com/goforj/wire/internal/cachepaths" @@ -56,8 +56,8 @@ func loaderArtifactPath(env []string, meta *packageMeta, isLocal bool) (string, } func loaderArtifactKey(meta *packageMeta, isLocal bool) (string, error) { - sum := sha256.New() - sum.Write([]byte("wire-loader-artifact-v4\n")) + sum := xxh3.New() + sum.Write([]byte("wire-loader-artifact-v5\n")) sum.Write([]byte(runtime.Version())) sum.Write([]byte{'\n'}) sum.Write([]byte(meta.ImportPath)) @@ -83,22 +83,21 @@ func loaderArtifactKey(meta *packageMeta, isLocal bool) (string, error) { sum.Write([]byte(meta.Error.Err)) sum.Write([]byte{'\n'}) } - return hex.EncodeToString(sum.Sum(nil)), nil + return strconv.FormatUint(sum.Sum64(), 16), nil } if err := hashMetaFiles(sum, metaFiles(meta)); err != nil { return "", err } - return hex.EncodeToString(sum.Sum(nil)), nil + return strconv.FormatUint(sum.Sum64(), 16), nil } -// hashFileContent returns the hex-encoded SHA-256 of the file content. +// hashFileContent returns the hex-encoded xxh3 hash of the file content. func hashFileContent(path string) (string, error) { data, err := os.ReadFile(path) if err != nil { return "", err } - h := sha256.Sum256(data) - return hex.EncodeToString(h[:]), nil + return strconv.FormatUint(xxh3.Hash(data), 16), nil } // hashMetaFiles writes content-based hashes for each file into sum. diff --git a/internal/loader/discovery_cache.go b/internal/loader/discovery_cache.go index 3381041..502bb88 100644 --- a/internal/loader/discovery_cache.go +++ b/internal/loader/discovery_cache.go @@ -2,15 +2,16 @@ package loader import ( "bytes" - "crypto/sha256" "encoding/gob" - "encoding/hex" "go/parser" "go/token" "os" "path/filepath" "runtime" "sort" + "strconv" + + "github.com/zeebo/xxh3" "github.com/goforj/wire/internal/cachepaths" ) @@ -254,7 +255,7 @@ func fingerprintDiscoveryFile(path string) (discoveryFileFingerprint, bool) { if err != nil { return discoveryFileFingerprint{}, false } - sum := sha256.New() + sum := xxh3.New() sum.Write([]byte(filepath.Base(path))) sum.Write([]byte{0}) file, err := parser.ParseFile(token.NewFileSet(), path, src, parser.ImportsOnly|parser.ParseComments) @@ -262,7 +263,7 @@ func fingerprintDiscoveryFile(path string) (discoveryFileFingerprint, bool) { sum.Write(src) return discoveryFileFingerprint{ Path: canonicalLoaderPath(path), - Hash: hex.EncodeToString(sum.Sum(nil)), + Hash: strconv.FormatUint(sum.Sum64(), 16), }, true } if offset := int(file.Package) - 1; offset > 0 && offset <= len(src) { @@ -280,7 +281,7 @@ func fingerprintDiscoveryFile(path string) (discoveryFileFingerprint, bool) { } return discoveryFileFingerprint{ Path: canonicalLoaderPath(path), - Hash: hex.EncodeToString(sum.Sum(nil)), + Hash: strconv.FormatUint(sum.Sum64(), 16), }, true } @@ -297,6 +298,5 @@ func hashGob(v interface{}) (string, error) { if err := gob.NewEncoder(&buf).Encode(v); err != nil { return "", err } - sum := sha256.Sum256(buf.Bytes()) - return hex.EncodeToString(sum[:]), nil + return strconv.FormatUint(xxh3.Hash(buf.Bytes()), 16), nil } diff --git a/internal/wire/output_cache.go b/internal/wire/output_cache.go index 42fcaa4..c7cd746 100644 --- a/internal/wire/output_cache.go +++ b/internal/wire/output_cache.go @@ -2,16 +2,17 @@ package wire import ( "context" - "crypto/sha256" "encoding/gob" - "encoding/hex" "os" "path/filepath" "runtime" "sort" "strconv" "strings" + "sync" + "github.com/zeebo/xxh3" + "golang.org/x/sync/errgroup" "golang.org/x/tools/go/packages" "github.com/goforj/wire/internal/cachepaths" @@ -56,39 +57,73 @@ func prepareGenerateOutputCache(ctx context.Context, wd string, env []string, pa } candidates := make(map[string]outputCacheCandidate, len(rootResult.Packages)) results := make([]GenerateResult, 0, len(rootResult.Packages)) - for _, pkg := range rootResult.Packages { - outDir, err := detectOutputDir(pkg.GoFiles) - if err != nil { - debugf(ctx, "generate.output_cache=bad_output_dir") - return candidates, nil, rootResult.Discovery, false - } - key, err := outputCacheKey(wd, opts, pkg) - if err != nil { - debugf(ctx, "generate.output_cache=key_error") - return candidates, nil, rootResult.Discovery, false - } - path, err := outputCachePath(env, key) - if err != nil { - debugf(ctx, "generate.output_cache=path_error") - return candidates, nil, rootResult.Discovery, false - } - candidates[pkg.PkgPath] = outputCacheCandidate{ - path: path, - outputPath: filepath.Join(outDir, opts.PrefixOutputFile+"wire_gen.go"), - } - entry, ok := readOutputCache(path) - if !ok { - debugf(ctx, "generate.output_cache=miss") - return candidates, nil, rootResult.Discovery, false - } - results = append(results, GenerateResult{ - PkgPath: pkg.PkgPath, - OutputPath: filepath.Join(outDir, opts.PrefixOutputFile+"wire_gen.go"), - Content: entry.Content, + fhc := newFileHashCache() + + // prefetch: collect all reachable files across all targets, hash in parallel + workspace := detectWireModuleRoot(wd) + allFiles := collectReachableFiles(workspace, rootResult.Packages) + if err := fhc.prefetch(allFiles); err != nil { + debugf(ctx, "generate.output_cache=prefetch_error err=%v", err) + return nil, nil, rootResult.Discovery, false + } + + // compute keys and lookup cache entries in parallel + type cacheEntry struct { + candidate outputCacheCandidate + result *GenerateResult // nil if miss + } + entries := make([]cacheEntry, len(rootResult.Packages)) + + g, _ := errgroup.WithContext(ctx) + for i, pkg := range rootResult.Packages { + g.Go(func() error { + outDir, err := detectOutputDir(pkg.GoFiles) + if err != nil { + return err + } + key, err := outputCacheKey(wd, opts, pkg, fhc) + if err != nil { + return err + } + path, err := outputCachePath(env, key) + if err != nil { + return err + } + entries[i].candidate = outputCacheCandidate{ + path: path, + outputPath: filepath.Join(outDir, opts.PrefixOutputFile+"wire_gen.go"), + } + if entry, ok := readOutputCache(path); ok { + entries[i].result = &GenerateResult{ + PkgPath: pkg.PkgPath, + OutputPath: entries[i].candidate.outputPath, + Content: entry.Content, + } + } + return nil }) } - debugf(ctx, "generate.output_cache=hit") - return candidates, results, rootResult.Discovery, len(results) == len(rootResult.Packages) + if err := g.Wait(); err != nil { + debugf(ctx, "generate.output_cache=error err=%v", err) + return candidates, nil, rootResult.Discovery, false + } + + allHit := true + for i, pkg := range rootResult.Packages { + candidates[pkg.PkgPath] = entries[i].candidate + if entries[i].result != nil { + results = append(results, *entries[i].result) + } else { + debugf(ctx, "generate.output_cache=miss pkg=%s", pkg.PkgPath) + allHit = false + } + } + if allHit { + debugf(ctx, "generate.output_cache=hit") + return candidates, results, rootResult.Discovery, true + } + debugf(ctx, "generate.output_cache=partial_miss hit=%d total=%d", len(results), len(rootResult.Packages)) + return candidates, nil, rootResult.Discovery, false } func writeGenerateOutputCache(candidates map[string]outputCacheCandidate, generated []GenerateResult) { @@ -154,9 +189,96 @@ func writeOutputCache(path string, entry *outputCacheEntry) error { return gob.NewEncoder(f).Encode(entry) } -func outputCacheKey(wd string, opts *GenerateOptions, root *packages.Package) (string, error) { - sum := sha256.New() - sum.Write([]byte("wire-output-cache-v1\n")) +// fileHashCache memoizes file content hashes across multiple outputCacheKey calls +// within a single wire invocation. This avoids re-reading and re-hashing the same +// files when multiple targets share overlapping dependency graphs. +type fileHashCache struct { + mu sync.Mutex + hashes map[string]string +} + +func newFileHashCache() *fileHashCache { + return &fileHashCache{hashes: make(map[string]string)} +} + +func (c *fileHashCache) hash(path string) (string, error) { + c.mu.Lock() + if h, ok := c.hashes[path]; ok { + c.mu.Unlock() + return h, nil + } + c.mu.Unlock() + + data, err := os.ReadFile(path) + if err != nil { + return "", err + } + result := strconv.FormatUint(xxh3.Hash(data), 16) + + c.mu.Lock() + c.hashes[path] = result + c.mu.Unlock() + return result, nil +} + +// prefetch hashes all given file paths concurrently using a worker pool. +// After prefetch, subsequent hash() calls are instant cache lookups. +func (c *fileHashCache) prefetch(paths []string) error { + // filter out already cached paths + var todo []string + c.mu.Lock() + for _, p := range paths { + if _, ok := c.hashes[p]; !ok { + todo = append(todo, p) + } + } + c.mu.Unlock() + if len(todo) == 0 { + return nil + } + + workers := runtime.NumCPU() * 4 + if workers > 64 { + workers = 64 + } + if len(todo) < workers { + workers = len(todo) + } + + var ( + wg sync.WaitGroup + mu sync.Mutex + errs []error + jobs = make(chan string, len(todo)) + ) + for i := 0; i < workers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for path := range jobs { + if _, err := c.hash(path); err != nil { + mu.Lock() + errs = append(errs, err) + mu.Unlock() + } + } + }() + } + for _, p := range todo { + jobs <- p + } + close(jobs) + wg.Wait() + + if len(errs) > 0 { + return errs[0] + } + return nil +} + +func outputCacheKey(wd string, opts *GenerateOptions, root *packages.Package, fhc *fileHashCache) (string, error) { + sum := xxh3.New() + sum.Write([]byte("wire-output-cache-v3\n")) sum.Write([]byte(runtime.Version())) sum.Write([]byte{'\n'}) sum.Write([]byte(canonicalWirePath(wd))) @@ -176,31 +298,47 @@ func outputCacheKey(wd string, opts *GenerateOptions, root *packages.Package) (s files := append([]string(nil), pkg.GoFiles...) sort.Strings(files) for _, name := range files { - info, err := os.Stat(name) + h, err := fhc.hash(name) if err != nil { return "", err } sum.Write([]byte(name)) sum.Write([]byte{'\n'}) - sum.Write([]byte(strconv.FormatInt(info.Size(), 10))) + sum.Write([]byte(h)) sum.Write([]byte{'\n'}) - sum.Write([]byte(strconv.FormatInt(info.ModTime().UnixNano(), 10))) - sum.Write([]byte{'\n'}) - if pkg.PkgPath == root.PkgPath { - src, err := os.ReadFile(name) - if err != nil { - return "", err - } - sum.Write(src) - sum.Write([]byte{'\n'}) - } } continue } - sum.Write([]byte(pkg.ExportFile)) - sum.Write([]byte{'\n'}) + if pkg.ExportFile != "" { + h, err := fhc.hash(pkg.ExportFile) + if err != nil { + return "", err + } + sum.Write([]byte(h)) + sum.Write([]byte{'\n'}) + } } - return hex.EncodeToString(sum.Sum(nil)), nil + return strconv.FormatUint(sum.Sum64(), 16), nil +} + +func collectReachableFiles(workspace string, roots []*packages.Package) []string { + seen := make(map[string]struct{}) + for _, root := range roots { + for _, pkg := range reachablePackages(root) { + if isLocalWirePackage(workspace, pkg) { + for _, name := range pkg.GoFiles { + seen[name] = struct{}{} + } + } else if pkg.ExportFile != "" { + seen[pkg.ExportFile] = struct{}{} + } + } + } + files := make([]string, 0, len(seen)) + for f := range seen { + files = append(files, f) + } + return files } func reachablePackages(root *packages.Package) []*packages.Package { @@ -240,6 +378,14 @@ func isLocalWirePackage(workspace string, pkg *packages.Package) bool { return len(dir) > len(workspace) && dir[:len(workspace)] == workspace && dir[len(workspace)] == filepath.Separator } +func hashExportFile(path string) (string, error) { + data, err := os.ReadFile(path) + if err != nil { + return "", err + } + return strconv.FormatUint(xxh3.Hash(data), 16), nil +} + func detectWireModuleRoot(start string) string { start = canonicalWirePath(start) for dir := start; dir != "" && dir != "." && dir != string(filepath.Separator); dir = filepath.Dir(dir) {