Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 28 additions & 7 deletions spdxexp/extracts.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
package spdxexp

import (
"maps"
"slices"
)

// ExtractLicenses extracts licenses from the given expression without duplicates.
// Returns an array of licenses or error if error occurs during processing.
func ExtractLicenses(expression string) ([]string, error) {
Expand All @@ -8,14 +13,30 @@ func ExtractLicenses(expression string) ([]string, error) {
return nil, err
}

expanded := node.expand(true)
licenses := make([]string, 0)
allLicenses := flatten(expanded)
for _, licenseNode := range allLicenses {
licenses = append(licenses, *licenseNode.reconstructedLicenseString())
seen := map[string]struct{}{}
collectExtractedLicenses(node, seen)
return slices.Collect(maps.Keys(seen)), nil
Comment on lines +16 to +18
Copy link

Copilot AI Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ExtractLicenses now returns slices.Collect(maps.Keys(seen)), which depends on Go map iteration order and can produce nondeterministic output across runs. This is a behavior change from the previous implementation (which produced a deterministic order after expand(true) + dedup) and can lead to flaky downstream assertions or unstable output.

Consider returning a deterministic order (e.g., collect keys into a slice and sort before returning, or maintain insertion order while deduping).

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The previous implementation did not have any sorting and would therefore also have been non-deterministic.

}

func collectExtractedLicenses(n *node, seen map[string]struct{}) {
if n == nil {
return
}

licenses = removeDuplicateStrings(licenses)
if n.isExpression() {
collectExtractedLicenses(n.left(), seen)
collectExtractedLicenses(n.right(), seen)
return
}

return licenses, nil
reconstructed := n.reconstructedLicenseString()
if reconstructed == nil {
return
}

license := *reconstructed
if _, ok := seen[license]; ok {
return
}
seen[license] = struct{}{}
}
67 changes: 67 additions & 0 deletions spdxexp/extracts_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,49 @@
package spdxexp

import (
"context"
"os"
"os/exec"
"testing"
"time"

"github.com/stretchr/testify/assert"
)

const kernelHeadersLicense = `(GPL-2.0-only WITH Linux-syscall-note OR BSD-2-Clause) AND (GPL-2.0-only WITH Linux-syscall-note OR BSD-3-Clause) AND (GPL-2.0-only WITH Linux-syscall-note OR CDDL-1.0) AND (GPL-2.0-only WITH Linux-syscall-note OR Linux-OpenIB) AND (GPL-2.0-only WITH Linux-syscall-note OR MIT) AND (GPL-2.0-or-later WITH Linux-syscall-note OR BSD-3-Clause) AND (GPL-2.0-or-later WITH Linux-syscall-note OR MIT) AND Apache-2.0 AND BSD-2-Clause AND BSD-3-Clause AND BSD-3-Clause-Clear AND GFDL-1.1-no-invariants-or-later AND GPL-1.0-or-later AND (GPL-1.0-or-later OR BSD-3-Clause) AND GPL-1.0-or-later WITH Linux-syscall-note AND GPL-2.0-only AND (GPL-2.0-only OR Apache-2.0) AND (GPL-2.0-only OR BSD-2-Clause) AND (GPL-2.0-only OR BSD-3-Clause) AND (GPL-2.0-only OR CDDL-1.0) AND (GPL-2.0-only OR GFDL-1.1-no-invariants-or-later) AND (GPL-2.0-only OR GFDL-1.2-no-invariants-only) AND GPL-2.0-only WITH Linux-syscall-note AND GPL-2.0-or-later AND (GPL-2.0-or-later OR BSD-2-Clause) AND (GPL-2.0-or-later OR BSD-3-Clause) AND (GPL-2.0-or-later OR CC-BY-4.0) AND GPL-2.0-or-later WITH GCC-exception-2.0 AND GPL-2.0-or-later WITH Linux-syscall-note AND ISC AND LGPL-2.0-or-later AND (LGPL-2.0-or-later OR BSD-2-Clause) AND LGPL-2.0-or-later WITH Linux-syscall-note AND LGPL-2.1-only AND (LGPL-2.1-only OR BSD-2-Clause) AND LGPL-2.1-only WITH Linux-syscall-note AND LGPL-2.1-or-later AND LGPL-2.1-or-later WITH Linux-syscall-note AND (Linux-OpenIB OR GPL-2.0-only) AND (Linux-OpenIB OR GPL-2.0-only OR BSD-2-Clause) AND Linux-man-pages-copyleft AND MIT AND (MIT OR GPL-2.0-only) AND (MIT OR GPL-2.0-or-later) AND (MIT OR LGPL-2.1-only) AND (MPL-1.1 OR GPL-2.0-only) AND (X11 OR GPL-2.0-only) AND (X11 OR GPL-2.0-or-later) AND Zlib AND (copyleft-next-0.3.1 OR GPL-2.0-or-later)`

var expectedKernelHeadersLicenses = []string{
"GPL-2.0-only WITH Linux-syscall-note",
"BSD-2-Clause",
"BSD-3-Clause",
"CDDL-1.0",
"Linux-OpenIB",
"MIT",
"GPL-2.0-or-later WITH Linux-syscall-note",
"Apache-2.0",
"BSD-3-Clause-Clear",
"GFDL-1.1-no-invariants-or-later",
"GPL-1.0-or-later",
"GPL-1.0-or-later WITH Linux-syscall-note",
"GPL-2.0-only",
"GFDL-1.2-no-invariants-only",
"GPL-2.0-or-later",
"CC-BY-4.0",
"GPL-2.0-or-later WITH GCC-exception-2.0",
"ISC",
"LGPL-2.0-or-later",
"LGPL-2.0-or-later WITH Linux-syscall-note",
"LGPL-2.1-only",
"LGPL-2.1-only WITH Linux-syscall-note",
"LGPL-2.1-or-later",
"LGPL-2.1-or-later WITH Linux-syscall-note",
"Linux-man-pages-copyleft",
"MPL-1.1",
"X11",
"Zlib",
"copyleft-next-0.3.1",
}

func TestExtractLicenses(t *testing.T) {
tests := []struct {
name string
Expand Down Expand Up @@ -35,3 +73,32 @@ func TestExtractLicenses(t *testing.T) {
})
}
}

func TestExtractLicensesLicenseRefAndDedup(t *testing.T) {
licenses, err := ExtractLicenses("(LicenseRef-custom OR LicenseRef-custom) AND (DocumentRef-spdx-tool-1.2:LicenseRef-custom OR MIT)")
assert.NoError(t, err)
assert.ElementsMatch(t, []string{"LicenseRef-custom", "DocumentRef-spdx-tool-1.2:LicenseRef-custom", "MIT"}, licenses)
}

func TestExtractLicensesLongExpressionDoesNotHang(t *testing.T) {
if os.Getenv("GO_SPDX_EXTRACT_LICENSES_LONG_CHILD") == "1" {
licenses, err := ExtractLicenses(kernelHeadersLicense)
assert.NoError(t, err)
assert.ElementsMatch(t, expectedKernelHeadersLicenses, licenses)
return
}

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()

// #nosec G204 G702 -- safe in tests: re-executes current test binary with fixed arg
cmd := exec.CommandContext(ctx, os.Args[0], "-test.run", "^TestExtractLicensesLongExpressionDoesNotHang$")
cmd.Env = append(os.Environ(), "GO_SPDX_EXTRACT_LICENSES_LONG_CHILD=1")
output, err := cmd.CombinedOutput()
if ctx.Err() == context.DeadlineExceeded {
t.Fatalf("ExtractLicenses timed out on long expression: %s", output)
}
if err != nil {
t.Fatalf("child process failed: %v\n%s", err, output)
}
}
24 changes: 0 additions & 24 deletions spdxexp/helpers.go

This file was deleted.

Loading