Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion SPECS/kubevirt/kubevirt.spec
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@
Summary: Container native virtualization
Name: kubevirt
Version: 1.7.1
Release: 2%{?dist}
Release: 3%{?dist}
License: ASL 2.0
Vendor: Microsoft Corporation
Distribution: Azure Linux
Group: System/Management
URL: https://github.com/kubevirt/kubevirt
Source0: https://github.com/kubevirt/kubevirt/archive/refs/tags/v%{version}.tar.gz#/%{name}-%{version}.tar.gz
Patch0: CVE-2025-11065.patch
Patch1: vfio-pci-fix.patch

%global debug_package %{nil}
BuildRequires: swtpm-tools
Expand Down Expand Up @@ -265,6 +266,9 @@ install -p -m 0644 cmd/virt-launcher/qemu.conf %{buildroot}%{_datadir}/kube-virt
%{_bindir}/virt-tests

%changelog
* Thu Apr 23 2026 Woojoong Kim <woojoongkim@microsoft.com> - 1.7.1-3
- Add vfio pci passthrough patch

* Wed Mar 25 2026 Aditya Singh <v-aditysing@microsoft.com> - 1.7.1-2
- Bump to rebuild with updated glibc

Expand Down
368 changes: 368 additions & 0 deletions SPECS/kubevirt/vfio-pci-fix.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,368 @@
From 3c576ab7e84f9c8ea8cc89691df6bc001b5d5dee Mon Sep 17 00:00:00 2001
From: Michael Henriksen <mhenriks@redhat.com>
Date: Fri, 17 Apr 2026 23:29:54 -0400
Subject: [PATCH] Fix VM with PCI hostdev failing to restart after hotplug
block volume

When a hotplug block volume is mounted into the virt-launcher pod,
allowBlockMajorMinor() calls cgroupManager.Set() to add the block
device to the cgroup allowlist. On cgroups v2, this replaces the
entire eBPF device filter program. The v2Manager rebuilds the program
from its in-memory rule cache, which is initialized from
generateDeviceRulesForVMI() and does not include devices provisioned
by device plugins. This wipes access to device-plugin-provided nodes
such as /dev/vfio/* (PCI/MDEV/GPU/SR-IOV passthrough) and
/dev/bus/usb/* (USB passthrough), causing libvirt to fail with
"pci backend driver type 'default' is not supported" when starting
the domain.

Fix by recursively scanning /dev/vfio/ and /dev/bus/usb/ inside the
container and including all discovered device rules in the initial
cache so they are preserved when the eBPF program is rebuilt.

Fixes: https://github.com/kubevirt/kubevirt/issues/17124

Signed-off-by: Michael Henriksen <mhenriks@redhat.com>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Michael Henriksen <mhenriks@redhat.com>
---
pkg/virt-handler/cgroup/BUILD.bazel | 3 +
pkg/virt-handler/cgroup/cgroup_test.go | 50 ++++++++++++++
pkg/virt-handler/cgroup/util.go | 95 +++++++++++++++++++++++---
tests/storage/hotplug.go | 77 +++++++++++++++++++++
4 files changed, 214 insertions(+), 11 deletions(-)

diff --git a/pkg/virt-handler/cgroup/BUILD.bazel b/pkg/virt-handler/cgroup/BUILD.bazel
index ace69f1d78..4f4ec95714 100644
--- a/pkg/virt-handler/cgroup/BUILD.bazel
+++ b/pkg/virt-handler/cgroup/BUILD.bazel
@@ -40,6 +40,9 @@ go_test(
embed = [":go_default_library"],
race = "on",
deps = [
+ "//pkg/safepath:go_default_library",
+ "//pkg/virt-handler/isolation:go_default_library",
+ "//staging/src/kubevirt.io/api/core/v1:go_default_library",
"//staging/src/kubevirt.io/client-go/testutils:go_default_library",
"//vendor/github.com/onsi/ginkgo/v2:go_default_library",
"//vendor/github.com/onsi/gomega:go_default_library",
diff --git a/pkg/virt-handler/cgroup/cgroup_test.go b/pkg/virt-handler/cgroup/cgroup_test.go
index 50b5198e2a..53450e2a06 100644
--- a/pkg/virt-handler/cgroup/cgroup_test.go
+++ b/pkg/virt-handler/cgroup/cgroup_test.go
@@ -20,12 +20,20 @@
package cgroup

import (
+ "os"
+ "path/filepath"
+
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
runc_cgroups "github.com/opencontainers/runc/libcontainer/cgroups"
runc_configs "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"go.uber.org/mock/gomock"
+
+ v1 "kubevirt.io/api/core/v1"
+
+ "kubevirt.io/kubevirt/pkg/safepath"
+ "kubevirt.io/kubevirt/pkg/virt-handler/isolation"
)

var _ = Describe("cgroup manager", func() {
@@ -195,3 +203,45 @@ var _ = Describe("cgroup manager", func() {
),
)
})
+
+var _ = Describe("generateDeviceRulesForVMI", func() {
+ var (
+ ctrl *gomock.Controller
+ tempDir string
+ )
+
+ BeforeEach(func() {
+ ctrl = gomock.NewController(GinkgoT())
+ tempDir = GinkgoT().TempDir()
+ Expect(os.MkdirAll(filepath.Join(tempDir, "dev"), 0755)).To(Succeed())
+ })
+
+ newMockIsolationWithMountRoot := func() isolation.IsolationResult {
+ mountRoot, err := safepath.NewPathNoFollow(tempDir)
+ Expect(err).ToNot(HaveOccurred())
+
+ mockIso := isolation.NewMockIsolationResult(ctrl)
+ mockIso.EXPECT().MountRoot().Return(mountRoot, nil)
+ return mockIso
+ }
+
+ It("should not fail when /dev/vfio does not exist", func() {
+ rules, err := generateDeviceRulesForVMI(&v1.VirtualMachineInstance{}, newMockIsolationWithMountRoot(), "")
+ Expect(err).ToNot(HaveOccurred())
+ Expect(rules).To(BeEmpty())
+ })
+
+ It("should not fail when /dev/vfio exists but is empty", func() {
+ Expect(os.MkdirAll(filepath.Join(tempDir, "dev", "vfio"), 0755)).To(Succeed())
+ rules, err := generateDeviceRulesForVMI(&v1.VirtualMachineInstance{}, newMockIsolationWithMountRoot(), "")
+ Expect(err).ToNot(HaveOccurred())
+ Expect(rules).To(BeEmpty())
+ })
+
+ It("should not fail when /dev/bus/usb exists but is empty", func() {
+ Expect(os.MkdirAll(filepath.Join(tempDir, "dev", "bus", "usb"), 0755)).To(Succeed())
+ rules, err := generateDeviceRulesForVMI(&v1.VirtualMachineInstance{}, newMockIsolationWithMountRoot(), "")
+ Expect(err).ToNot(HaveOccurred())
+ Expect(rules).To(BeEmpty())
+ })
+})
diff --git a/pkg/virt-handler/cgroup/util.go b/pkg/virt-handler/cgroup/util.go
index 892113c83d..be115bad14 100644
--- a/pkg/virt-handler/cgroup/util.go
+++ b/pkg/virt-handler/cgroup/util.go
@@ -59,6 +59,9 @@ const (
V2 CgroupVersion = "v2"

loggingVerbosity = 2
+
+ rwmPermissions = "rwm"
+ rwPermissions = "rw"
)

var (
@@ -126,6 +129,18 @@ func getSourceBlockToFsMigratedVolumes(vmi *v1.VirtualMachineInstance, host stri
return vols
}

+func getDevicePermissionsFromCgroups() devices.Permissions {
+ if cgroups.IsCgroup2UnifiedMode() {
+ return rwmPermissions
+ } else {
+ return rwPermissions
+ }
+}
+
+func getDeviceRwmPermissions() devices.Permissions {
+ return rwmPermissions
+}
+
// This builds up the known persistent block devices allow list for a VMI (as in, hotplugged volumes are handled separately)
// This will be maintained and extended as new devices likely have to end up on this list as well
// For example - https://kubernetes.io/docs/concepts/scheduling-eviction/dynamic-resource-allocation/
@@ -159,7 +174,7 @@ func generateDeviceRulesForVMI(vmi *v1.VirtualMachineInstance, isolationRes isol
}
return nil, fmt.Errorf("failed to resolve path for volume %s: %v", volume.Name, err)
}
- if deviceRule, err := newAllowedDeviceRule(path); err != nil {
+ if deviceRule, err := newAllowedDeviceRule(path, getDeviceRwmPermissions()); err != nil {
return nil, fmt.Errorf("failed to create device rule for %s: %v", path, err)
} else if deviceRule != nil {
log.Log.V(loggingVerbosity).Infof("device rule for volume %s: %v", volume.Name, deviceRule)
@@ -171,7 +186,7 @@ func generateDeviceRulesForVMI(vmi *v1.VirtualMachineInstance, isolationRes isol
if err != nil {
return nil, err
}
- if deviceRule, err := newAllowedDeviceRule(path); err != nil {
+ if deviceRule, err := newAllowedDeviceRule(path, getDeviceRwmPermissions()); err != nil {
return nil, fmt.Errorf("failed to create device rule for %s: %v", path, err)
} else if deviceRule != nil {
log.Log.V(loggingVerbosity).Infof("device rule for volume rng: %v", deviceRule)
@@ -183,7 +198,7 @@ func generateDeviceRulesForVMI(vmi *v1.VirtualMachineInstance, isolationRes isol
if err != nil {
return nil, err
}
- if deviceRule, err := newAllowedDeviceRule(path); err != nil {
+ if deviceRule, err := newAllowedDeviceRule(path, getDeviceRwmPermissions()); err != nil {
return nil, fmt.Errorf("failed to create device rule for %s: %v", path, err)
} else if deviceRule != nil {
log.Log.V(loggingVerbosity).Infof("device rule for volume vsock: %v", deviceRule)
@@ -191,10 +206,73 @@ func generateDeviceRulesForVMI(vmi *v1.VirtualMachineInstance, isolationRes isol
}
}

+ // Device-plugin-provisioned devices (VFIO, USB) must be in the cgroup
+ // rule cache so they survive eBPF program rebuilds during hotplug.
+ for _, devDir := range []string{
+ filepath.Join("dev", "vfio"),
+ filepath.Join("dev", "bus", "usb"),
+ } {
+ rules, err := discoverDeviceRulesInDir(mountRoot, devDir)
+ if err != nil {
+ return nil, fmt.Errorf("failed to discover device rules in %s: %v", devDir, err)
+ }
+ vmiDeviceRules = append(vmiDeviceRules, rules...)
+ }
+
return vmiDeviceRules, nil
}

-func newAllowedDeviceRule(devicePath *safepath.Path) (*devices.Rule, error) {
+// discoverDeviceRulesInDir recursively scans a directory under the
+// container's filesystem and creates allow rules for all device nodes
+// found. These devices are provisioned by device plugins or the container
+// runtime and must be preserved in the v2 cgroup manager's rule cache so
+// they are not lost when the eBPF device filter is rebuilt by subsequent
+// Set() calls (e.g. during hotplug volume mounting).
+func discoverDeviceRulesInDir(mountRoot *safepath.Path, relPath string) ([]*devices.Rule, error) {
+ dirPath, err := safepath.JoinNoFollow(mountRoot, relPath)
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ return nil, nil
+ }
+ return nil, err
+ }
+
+ var entries []os.DirEntry
+ err = dirPath.ExecuteNoFollow(func(path string) (err error) {
+ entries, err = os.ReadDir(path)
+ return err
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ var rules []*devices.Rule
+ for _, entry := range entries {
+ if entry.IsDir() {
+ subRules, err := discoverDeviceRulesInDir(mountRoot, filepath.Join(relPath, entry.Name()))
+ if err != nil {
+ return nil, err
+ }
+ rules = append(rules, subRules...)
+ continue
+ }
+ devPath, err := safepath.JoinNoFollow(dirPath, entry.Name())
+ if err != nil {
+ return nil, err
+ }
+ rule, err := newAllowedDeviceRule(devPath, getDeviceRwmPermissions())
+ if err != nil {
+ return nil, fmt.Errorf("failed to create device rule for %s/%s: %v", relPath, entry.Name(), err)
+ }
+ if rule != nil {
+ log.Log.V(loggingVerbosity).Infof("device rule for %s/%s: %v", relPath, entry.Name(), rule)
+ rules = append(rules, rule)
+ }
+ }
+ return rules, nil
+}
+
+func newAllowedDeviceRule(devicePath *safepath.Path, devicePermissions devices.Permissions) (*devices.Rule, error) {
fileInfo, err := safepath.StatAtNoFollow(devicePath)
if err != nil {
return nil, err
@@ -211,7 +289,7 @@ func newAllowedDeviceRule(devicePath *safepath.Path) (*devices.Rule, error) {
Type: deviceType,
Major: int64(unix.Major(stat.Rdev)),
Minor: int64(unix.Minor(stat.Rdev)),
- Permissions: "rwm",
+ Permissions: devicePermissions,
Allow: true,
}, nil
}
@@ -224,12 +302,7 @@ func GenerateDefaultDeviceRules() []*devices.Rule {

const toAllow = true

- var permissions devices.Permissions
- if cgroups.IsCgroup2UnifiedMode() {
- permissions = "rwm"
- } else {
- permissions = "rw"
- }
+ permissions := getDevicePermissionsFromCgroups()

defaultRules := []*devices.Rule{
{ // /dev/ptmx (PTY master multiplex)
diff --git a/tests/storage/hotplug.go b/tests/storage/hotplug.go
index 2a40430c29..cc3567386f 100644
--- a/tests/storage/hotplug.go
+++ b/tests/storage/hotplug.go
@@ -2275,6 +2275,83 @@ var _ = Describe(SIG("Hotplug", func() {
verifyVolumeNolongerAccessible(vmi, targets[0])
})
})
+
+ // Regression test for https://github.com/kubevirt/kubevirt/issues/17124
+ Context("with PCI hostdev", Serial, func() {
+ const deviceName = "example.org/soundcard"
+
+ BeforeEach(func() {
+ kvconfig.EnableFeatureGate(featuregate.HostDevicesGate)
+
+ kv := libkubevirt.GetCurrentKv(virtClient)
+ config := kv.Spec.Configuration
+ config.PermittedHostDevices = &v1.PermittedHostDevices{
+ PciHostDevices: []v1.PciHostDevice{
+ {
+ PCIVendorSelector: "8086:2668",
+ ResourceName: deviceName,
+ },
+ },
+ }
+ kvconfig.UpdateKubeVirtConfigValueAndWait(config)
+ })
+
+ AfterEach(func() {
+ kv := libkubevirt.GetCurrentKv(virtClient)
+ config := kv.Spec.Configuration
+ config.PermittedHostDevices = &v1.PermittedHostDevices{}
+ kvconfig.UpdateKubeVirtConfigValueAndWait(config)
+ kvconfig.DisableFeatureGate(featuregate.HostDevicesGate)
+ })
+
+ It("should restart a VM after hotplugging a block volume", func() {
+ sc, exists := libstorage.GetRWOBlockStorageClass()
+ if !exists {
+ Skip("no RWO block storage class available")
+ }
+
+ vmiSpec := libvmifact.NewAlpineWithTestTooling()
+ vmiSpec.Spec.Domain.Devices.HostDevices = []v1.HostDevice{
+ {Name: "sound0", DeviceName: deviceName},
+ }
+ vm, err := virtClient.VirtualMachine(testsuite.GetTestNamespace(nil)).Create(
+ context.Background(),
+ libvmi.NewVirtualMachine(vmiSpec, libvmi.WithRunStrategy(v1.RunStrategyAlways)),
+ metav1.CreateOptions{},
+ )
+ Expect(err).ToNot(HaveOccurred())
+ Eventually(matcher.ThisVM(vm)).WithTimeout(300 * time.Second).WithPolling(time.Second).Should(matcher.BeReady())
+
+ vmi, err := virtClient.VirtualMachineInstance(vm.Namespace).Get(context.Background(), vm.Name, metav1.GetOptions{})
+ Expect(err).ToNot(HaveOccurred())
+ libwait.WaitForSuccessfulVMIStart(vmi, libwait.WithTimeout(240))
+
+ dvBuilder := libdv.NewDataVolume(
+ libdv.WithBlankImageSource(),
+ libdv.WithStorage(
+ libdv.StorageWithStorageClass(sc),
+ libdv.StorageWithVolumeSize(cd.BlankVolumeSize),
+ libdv.StorageWithVolumeMode(k8sv1.PersistentVolumeBlock),
+ ),
+ )
+ dv, err := virtClient.CdiClient().CdiV1beta1().DataVolumes(testsuite.GetTestNamespace(nil)).Create(
+ context.Background(), dvBuilder, metav1.CreateOptions{})
+ Expect(err).ToNot(HaveOccurred())
+ libstorage.EventuallyDV(dv, 240, Or(matcher.HaveSucceeded(), matcher.WaitForFirstConsumer()))
+
+ By("Hotplugging a block volume to the running VM")
+ addVolumeVMWithSource(vm.Name, vm.Namespace, getAddVolumeOptions("hotplug-vol", v1.DiskBusSCSI, &v1.HotplugVolumeSource{
+ DataVolume: &v1.DataVolumeSource{Name: dv.Name},
+ }, false, false, ""))
+ verifyVolumeStatus(vmi, v1.VolumeReady, "", "hotplug-vol")
+
+ By("Restarting the VM")
+ vm = libvmops.StopVirtualMachine(vm)
+ err = virtClient.VirtualMachine(vm.Namespace).Start(context.Background(), vm.Name, &v1.StartOptions{})
+ Expect(err).ToNot(HaveOccurred())
+ Eventually(matcher.ThisVM(vm), 300*time.Second, time.Second).Should(matcher.BeReady())
+ })
+ })
}))

func verifyVolumeAndDiskVMAdded(virtClient kubecli.KubevirtClient, vm *v1.VirtualMachine, volumeNames ...string) {
--
2.34.1

Loading
Loading