From dc85f47b0d762f6918af27794076e387f4a76ba4 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
Date: Tue, 21 Apr 2026 16:03:06 +0200
Subject: [PATCH 01/29] lxc/cgroups: drop cgroup1 freezer support

Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
---
 src/lxc/cgroups/cgfsng.c | 32 ++++----------------------------
 1 file changed, 4 insertions(+), 28 deletions(-)

diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index fcaea291fc..c92473a43f 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -2576,18 +2576,6 @@ __cgfsng_ops static bool cgfsng_criu_get_hierarchies(struct cgroup_ops *ops,
 	return true;
 }
 
-static int cg_legacy_freeze(struct cgroup_ops *ops)
-{
-	struct hierarchy *h;
-
-	h = get_hierarchy(ops, "freezer");
-	if (!h)
-		return ret_set_errno(-1, ENOENT);
-
-	return lxc_write_openat(h->path_con, "freezer.state",
-				"FROZEN", STRLITERALLEN("FROZEN"));
-}
-
 static int freezer_cgroup_events_cb(int fd, uint32_t events, void *cbdata,
 				    struct lxc_async_descr *descr)
 {
@@ -2680,24 +2668,12 @@ __cgfsng_ops static int cgfsng_freeze(struct cgroup_ops *ops, int timeout)
 	if (!ops->hierarchies)
 		return ret_set_errno(-1, ENOENT);
 
-	if (ops->cgroup_layout != CGROUP_LAYOUT_UNIFIED)
-		return cg_legacy_freeze(ops);
+	if (!pure_unified_layout(ops))
+		return ret_set_errno(-1, EOPNOTSUPP);
 
 	return cg_unified_freeze(ops, timeout);
 }
 
-static int cg_legacy_unfreeze(struct cgroup_ops *ops)
-{
-	struct hierarchy *h;
-
-	h = get_hierarchy(ops, "freezer");
-	if (!h)
-		return ret_set_errno(-1, ENOENT);
-
-	return lxc_write_openat(h->path_con, "freezer.state",
-				"THAWED", STRLITERALLEN("THAWED"));
-}
-
 static int cg_unified_unfreeze(struct cgroup_ops *ops, int timeout)
 {
 	return cg_unified_freeze_do(ops, timeout, "0", 0,
@@ -2710,8 +2686,8 @@ __cgfsng_ops static int cgfsng_unfreeze(struct cgroup_ops *ops, int timeout)
 	if (!ops->hierarchies)
 		return ret_set_errno(-1, ENOENT);
 
-	if (ops->cgroup_layout != CGROUP_LAYOUT_UNIFIED)
-		return cg_legacy_unfreeze(ops);
+	if (!pure_unified_layout(ops))
+		return ret_set_errno(-1, EOPNOTSUPP);
 
 	return cg_unified_unfreeze(ops, timeout);
 }

From 8c8da5ed3343e63dea9f4c09f82c437236d3f10e Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
Date: Tue, 21 Apr 2026 16:05:14 +0200
Subject: [PATCH 02/29] lxc/cgroup: drop cgroup1 device cgroup support

Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
---
 src/lxc/cgroups/cgfsng.c | 140 ---------------------------------------
 src/lxc/cgroups/cgroup.h |   2 -
 src/lxc/start.c          |  16 -----
 3 files changed, 158 deletions(-)

diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index c92473a43f..557b0cd32d 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -791,16 +791,6 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
 		TRACE("Created limit cgroup %d->%d(%s)",
 		      h->dfd_lim, h->dfd_base, cgroup_limit_dir);
 
-		/*
-		 * With isolation the devices legacy cgroup needs to be
-		 * iinitialized early, as it typically contains an 'a' (all)
-		 * line, which is not possible once a subdirectory has been
-		 * created.
-		 */
-		if (string_in_list(h->controllers, "devices") &&
-		    !ops->setup_limits_legacy(ops, conf, true))
-			return log_warn(false, "Failed to setup legacy device limits");
-
 		/*
 		 * If we use a separate limit cgroup, the leaf cgroup, i.e. the
 		 * cgroup the container actually resides in, is below fd_limit.
@@ -3346,135 +3336,6 @@ static int device_cgroup_rule_parse_devpath(struct device_item *device,
 	return 0;
 }
 
-static int convert_devpath(const char *invalue, char *dest)
-{
-	struct device_item device = {};
-	int ret;
-
-	ret = device_cgroup_rule_parse_devpath(&device, invalue);
-	if (ret < 0)
-		return -1;
-
-	ret = strnprintf(dest, 50, "%c %d:%d %s", device.type, device.major,
-			 device.minor, device.access);
-	if (ret < 0)
-		return log_error_errno(ret, -ret,
-				       "Error on configuration value \"%c %d:%d %s\" (max 50 chars)",
-				       device.type, device.major, device.minor,
-				       device.access);
-
-	return 0;
-}
-
-/* Called from setup_limits - here we have the container's cgroup_data because
- * we created the cgroups.
- */
-static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
-			      const char *value, bool is_cpuset)
-{
-	__do_free char *controller = NULL;
-	char *p;
-	/* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */
-	char converted_value[50];
-	struct hierarchy *h;
-
-	controller = strdup(filename);
-	if (!controller)
-		return ret_errno(ENOMEM);
-
-	p = strchr(controller, '.');
-	if (p)
-		*p = '\0';
-
-	if (strequal("devices.allow", filename) && value[0] == '/') {
-		int ret;
-
-		ret = convert_devpath(value, converted_value);
-		if (ret < 0)
-			return ret;
-		value = converted_value;
-	}
-
-	h = get_hierarchy(ops, controller);
-	if (!h)
-		return log_error_errno(-ENOENT, ENOENT, "Failed to setup limits for the \"%s\" controller. The controller seems to be unused by \"cgfsng\" cgroup driver or not enabled on the cgroup hierarchy", controller);
-
-	if (is_cpuset) {
-		int ret = lxc_write_openat(h->path_con, filename, value, strlen(value));
-		if (ret)
-			return ret;
-	}
-	return lxc_write_openat(h->path_lim, filename, value, strlen(value));
-}
-
-/*
- * Return the list of cgroup_settings sorted according to the following rules
- * 1. Put memory.limit_in_bytes before memory.memsw.limit_in_bytes
- */
-static void sort_cgroup_settings(struct lxc_conf *conf)
-{
-	LIST_HEAD(memsw_list);
-	struct lxc_cgroup *cgroup, *ncgroup;
-
-	/* Iterate over the cgroup settings and copy them to the output list. */
-	list_for_each_entry_safe(cgroup, ncgroup, &conf->cgroup, head) {
-		if (!strequal(cgroup->subsystem, "memory.memsw.limit_in_bytes"))
-			continue;
-
-		/* Move the memsw entry from the cgroup settings list. */
-		list_move_tail(&cgroup->head, &memsw_list);
-	}
-
-	/*
-	 * Append all the memsw entries to the end of the cgroup settings list
-	 * to make sure they are applied after all memory limit settings.
-	 */
-	list_splice_tail(&memsw_list, &conf->cgroup);
-
-}
-
-__cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
-						    struct lxc_conf *conf,
-						    bool do_devices)
-{
-	struct list_head *cgroup_settings;
-	struct lxc_cgroup *cgroup;
-
-	if (!ops)
-		return ret_set_errno(false, ENOENT);
-
-	if (!conf)
-		return ret_set_errno(false, EINVAL);
-
-	cgroup_settings = &conf->cgroup;
-	if (list_empty(cgroup_settings))
-		return true;
-
-	if (!ops->hierarchies)
-		return ret_set_errno(false, EINVAL);
-
-	if (pure_unified_layout(ops))
-		return log_warn_errno(true, EINVAL, "Ignoring legacy cgroup limits on pure cgroup2 system");
-
-	sort_cgroup_settings(conf);
-	list_for_each_entry(cgroup, cgroup_settings, head) {
-		if (do_devices == strnequal("devices", cgroup->subsystem, 7)) {
-			if (cg_legacy_set_data(ops, cgroup->subsystem, cgroup->value, strnequal("cpuset", cgroup->subsystem, 6))) {
-				if (do_devices && (errno == EACCES || errno == EPERM)) {
-					SYSWARN("Failed to set \"%s\" to \"%s\"", cgroup->subsystem, cgroup->value);
-					continue;
-				}
-				SYSERROR("Failed to set \"%s\" to \"%s\"", cgroup->subsystem, cgroup->value);
-				return false;
-			}
-			DEBUG("Set controller \"%s\" set to \"%s\"", cgroup->subsystem, cgroup->value);
-		}
-	}
-
-	INFO("Limits for the legacy cgroup hierarchies have been setup");
-	return true;
-}
-
 /*
  * Some of the parsing logic comes from the original cgroup device v1
  * implementation in the kernel.
@@ -4185,7 +4046,6 @@ struct cgroup_ops *cgroup_ops_init(struct lxc_conf *conf)
 	cgfsng_ops->set 				= cgfsng_set;
 	cgfsng_ops->freeze				= cgfsng_freeze;
 	cgfsng_ops->unfreeze				= cgfsng_unfreeze;
-	cgfsng_ops->setup_limits_legacy			= cgfsng_setup_limits_legacy;
 	cgfsng_ops->setup_limits			= cgfsng_setup_limits;
 	cgfsng_ops->driver				= "cgfsng";
 	cgfsng_ops->version				= "1.0.0";
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
index 108e5d84ec..54c34530b9 100644
--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -262,8 +262,6 @@ struct cgroup_ops {
 		   size_t len, const char *name, const char *lxcpath);
 	int (*freeze)(struct cgroup_ops *ops, int timeout);
 	int (*unfreeze)(struct cgroup_ops *ops, int timeout);
-	bool (*setup_limits_legacy)(struct cgroup_ops *ops,
-				    struct lxc_conf *conf, bool with_devices);
 	bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_handler *handler);
 	bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf);
 	bool (*attach)(struct cgroup_ops *ops, const struct lxc_conf *conf,
diff --git a/src/lxc/start.c b/src/lxc/start.c
index c64e78e93f..7f269805e0 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -2020,11 +2020,6 @@ static int lxc_spawn(struct lxc_handler *handler)
 		}
 	}
 
-	if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, false)) {
-		ERROR("Failed to setup cgroup limits for container \"%s\"", name);
-		goto out_delete_net;
-	}
-
 	if (!cgroup_ops->payload_delegate_controllers(cgroup_ops)) {
 		ERROR("Failed to delegate controllers to payload cgroup");
 		goto out_delete_net;
@@ -2113,17 +2108,6 @@ static int lxc_spawn(struct lxc_handler *handler)
 	if (!lxc_sync_wait_child(handler, START_SYNC_CGROUP_LIMITS))
 		goto out_delete_net;
 
-	/*
-	 * With isolation the limiting devices cgroup was already setup, so
-	 * only setup devices here if we have no namespace directory.
-	 */
-	if (!handler->conf->cgroup_meta.namespace_dir &&
-	    !cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, true)) {
-		ERROR("Failed to setup legacy device cgroup controller limits");
-		goto out_delete_net;
-	}
-	TRACE("Set up legacy device cgroup controller limits");
-
 	if (!cgroup_ops->devices_activate(cgroup_ops, handler)) {
 		ERROR("Failed to setup cgroup2 device controller limits");
 		goto out_delete_net;

From b4e57865561884baa6af2224c2d0e7a3cad9a3ed Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
Date: Tue, 21 Apr 2026 16:22:49 +0200
Subject: [PATCH 03/29] lxc/cgroups: drop special handling logic for cgroup1
 cpuset controller

Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
---
 src/lxc/cgroups/cgfsng.c | 278 +--------------------------------------
 1 file changed, 6 insertions(+), 272 deletions(-)

diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index 557b0cd32d..cf27b86c57 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -184,138 +184,6 @@ int prepare_cgroup_fd(const struct cgroup_ops *ops, struct cgroup_fd *fd, bool l
 	return 0;
 }
 
-/* Create cpumask from cpulist aka turn:
- *
- *	0,2-3
- *
- * into bit array
- *
- *	1 0 1 1
- */
-static int lxc_cpumask(char *buf, __u32 **bitarr, __u32 *last_set_bit)
-{
-	__do_free __u32 *arr_u32 = NULL;
-	__u32 cur_last_set_bit = 0, nbits = 256;
-	__u32 nr_u32;
-	char *token;
-
-	nr_u32 = BITS_TO_LONGS(nbits);
-	arr_u32 = zalloc(nr_u32 * sizeof(__u32));
-	if (!arr_u32)
-		return ret_errno(ENOMEM);
-
-	lxc_iterate_parts(token, buf, ",") {
-		__u32 last_bit, first_bit;
-		char *range;
-
-		errno = 0;
-		first_bit = strtoul(token, NULL, 0);
-		last_bit = first_bit;
-		range = strchr(token, '-');
-		if (range)
-			last_bit = strtoul(range + 1, NULL, 0);
-
-		if (!(first_bit <= last_bit))
-			return ret_errno(EINVAL);
-
-		if (last_bit >= nbits) {
-			__u32 add_bits = last_bit - nbits + 32;
-			__u32 new_nr_u32;
-			__u32 *p;
-
-			new_nr_u32 = BITS_TO_LONGS(nbits + add_bits);
-			p = realloc(arr_u32, new_nr_u32 * sizeof(uint32_t));
-			if (!p)
-				return ret_errno(ENOMEM);
-			arr_u32 = move_ptr(p);
-
-			memset(arr_u32 + nr_u32, 0,
-			       (new_nr_u32 - nr_u32) * sizeof(uint32_t));
-			nbits += add_bits;
-		}
-
-		while (first_bit <= last_bit)
-			set_bit(first_bit++, arr_u32);
-
-		if (last_bit > cur_last_set_bit)
-			cur_last_set_bit = last_bit;
-	}
-
-	*last_set_bit = cur_last_set_bit;
-	*bitarr = move_ptr(arr_u32);
-	return 0;
-}
-
-static int lxc_cpumask_update(char *buf, __u32 *bitarr, __u32 last_set_bit,
-			      bool clear)
-{
-	bool flipped = false;
-	char *token;
-
-	lxc_iterate_parts(token, buf, ",") {
-		__u32 last_bit, first_bit;
-		char *range;
-
-		errno = 0;
-		first_bit = strtoul(token, NULL, 0);
-		last_bit = first_bit;
-		range = strchr(token, '-');
-		if (range)
-			last_bit = strtoul(range + 1, NULL, 0);
-
-		if (!(first_bit <= last_bit)) {
-			WARN("The cup range seems to be inverted: %u-%u", first_bit, last_bit);
-			continue;
-		}
-
-		if (last_bit > last_set_bit)
-			continue;
-
-		while (first_bit <= last_bit) {
-			if (clear && is_set(first_bit, bitarr)) {
-				flipped = true;
-				clear_bit(first_bit, bitarr);
-			} else if (!clear && !is_set(first_bit, bitarr)) {
-				flipped = true;
-				set_bit(first_bit, bitarr);
-			}
-
-			first_bit++;
-		}
-	}
-
-	if (flipped)
-		return 1;
-
-	return 0;
-}
-
-/* Turn cpumask into simple, comma-separated cpulist. */
-static char *lxc_cpumask_to_cpulist(__u32 *bitarr, __u32 last_set_bit)
-{
-	__do_free_string_list char **cpulist = NULL;
-	char numstr[INTTYPE_TO_STRLEN(__u32)] = {0};
-	int ret;
-
-	for (__u32 bit = 0; bit <= last_set_bit; bit++) {
-		if (!is_set(bit, bitarr))
-			continue;
-
-		ret = strnprintf(numstr, sizeof(numstr), "%u", bit);
-		if (ret < 0)
-			return NULL;
-
-		ret = lxc_append_string(&cpulist, numstr);
-		if (ret < 0)
-			return ret_set_errno(NULL, ENOMEM);
-	}
-
-	if (!cpulist)
-		return ret_set_errno(NULL, ENOMEM);
-
-	return lxc_string_join(",", (const char **)cpulist, false);
-}
-
 static inline bool is_unified_hierarchy(const struct hierarchy *h)
 {
 	return h->fs_type == UNIFIED_HIERARCHY;
@@ -580,131 +448,8 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
 		SYSWARN("Failed to destroy cgroups");
 }
 
-#define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
-#define __OFFLINE_CPUS "/sys/devices/system/cpu/offline"
-static bool cpuset1_cpus_initialize(int dfd_parent, int dfd_child,
-				    bool am_initialized)
-{
-	__do_free char *cpulist = NULL, *isolcpus = NULL,
-		       *offlinecpus = NULL, *posscpus = NULL;
-	__do_free __u32 *possmask = NULL;
-	int ret;
-	__u32 poss_last_set_bit = 0;
-
-#if !IS_BIONIC
-	posscpus = read_file_at(dfd_parent, "cpuset.cpus", PROTECT_OPEN, 0);
-#else
-	posscpus = read_file_at(dfd_parent, "cpus", PROTECT_OPEN, 0);
-#endif
-	if (!posscpus)
-		return log_error_errno(false, errno, "Failed to read file %d/cpuset.cpus", dfd_parent);
-
-	if (file_exists(__ISOL_CPUS)) {
-		isolcpus = read_file_at(-EBADF, __ISOL_CPUS, PROTECT_OPEN, 0);
-		if (!isolcpus)
-			return log_error_errno(false, errno, "Failed to read file \"%s\"", __ISOL_CPUS);
-
-		if (!isdigit(isolcpus[0]))
-			free_disarm(isolcpus);
-	} else {
-		TRACE("The path \""__ISOL_CPUS"\" to read isolated cpus from does not exist");
-	}
-
-	if (file_exists(__OFFLINE_CPUS)) {
-		offlinecpus = read_file_at(-EBADF, __OFFLINE_CPUS, PROTECT_OPEN, 0);
-		if (!offlinecpus)
-			return log_error_errno(false, errno, "Failed to read file \"%s\"", __OFFLINE_CPUS);
-
-		if (!isdigit(offlinecpus[0]))
-			free_disarm(offlinecpus);
-	} else {
-		TRACE("The path \""__OFFLINE_CPUS"\" to read offline cpus from does not exist");
-	}
-
-	if (!isolcpus && !offlinecpus) {
-		cpulist = move_ptr(posscpus);
-		goto copy_parent;
-	}
-
-	ret = lxc_cpumask(posscpus, &possmask, &poss_last_set_bit);
-	if (ret)
-		return log_error_errno(false, errno, "Failed to create cpumask for possible cpus");
-
-	if (isolcpus)
-		ret = lxc_cpumask_update(isolcpus, possmask, poss_last_set_bit, true);
-
-	if (offlinecpus)
-		ret |= lxc_cpumask_update(offlinecpus, possmask, poss_last_set_bit, true);
-
-	if (!ret) {
-		cpulist = lxc_cpumask_to_cpulist(possmask, poss_last_set_bit);
-		TRACE("No isolated or offline cpus present in cpuset");
-	} else {
-		cpulist = move_ptr(posscpus);
-		TRACE("Removed isolated or offline cpus from cpuset");
-	}
-	if (!cpulist)
-		return log_error_errno(false, errno, "Failed to create cpu list");
-
-copy_parent:
-	if (!am_initialized) {
-#if !IS_BIONIC
-		ret = lxc_writeat(dfd_child, "cpuset.cpus", cpulist, strlen(cpulist));
-#else
-		ret = lxc_writeat(dfd_child, "cpus", cpulist, strlen(cpulist));
-#endif
-		if (ret < 0)
-			return log_error_errno(false, errno, "Failed to write cpu list to \"%d/cpuset.cpus\"", dfd_child);
-
-		TRACE("Copied cpu settings of parent cgroup");
-	}
-
-	return true;
-}
-
-static bool cpuset1_initialize(int dfd_base, int dfd_next)
-{
-	char mems[PATH_MAX];
-	ssize_t bytes;
-	char v;
-
-	/* Determine whether the base cgroup has cpuset inheritance turned on. */
-	bytes = lxc_readat(dfd_base, "cgroup.clone_children", &v, 1);
-	if (bytes < 0)
-		return syserror_ret(false, "Failed to read file %d(cgroup.clone_children)", dfd_base);
-
-	/* Initialize cpuset.cpus removing any isolated and offline cpus. */
-	if (!cpuset1_cpus_initialize(dfd_base, dfd_next, v == '1'))
-		return syserror_ret(false, "Failed to initialize cpuset.cpus");
-
-	/* Read cpuset.mems from parent... */
-#if !IS_BIONIC
-	bytes = lxc_readat(dfd_base, "cpuset.mems", mems, sizeof(mems));
-#else
-	bytes = lxc_readat(dfd_base, "mems", mems, sizeof(mems));
-#endif
-	if (bytes < 0)
-		return syserror_ret(false, "Failed to read file %d(cpuset.mems)", dfd_base);
-
-	/* and copy to first cgroup in the tree... */
-#if !IS_BIONIC
-	bytes = lxc_writeat(dfd_next, "cpuset.mems", mems, bytes);
-#else
-	bytes = lxc_writeat(dfd_next, "mems", mems, bytes);
-#endif
-	if (bytes < 0)
-		return syserror_ret(false, "Failed to write %d(cpuset.mems)", dfd_next);
-
-	/* and finally turn on cpuset inheritance. */
-	bytes = lxc_writeat(dfd_next, "cgroup.clone_children", "1", 1);
-	if (bytes < 0)
-		return syserror_ret(false, "Failed to write %d(cgroup.clone_children)", dfd_next);
-
-	return log_trace(true, "Initialized cpuset in the legacy hierarchy");
-}
-
 static int __cgroup_tree_create(int dfd_base, const char *path, mode_t mode,
-				bool cpuset_v1, bool eexist_ignore)
+				bool eexist_ignore)
 {
 	__do_close int dfd_final = -EBADF;
 	int dfd_cur = dfd_base;
@@ -747,8 +492,7 @@ static int __cgroup_tree_create(int dfd_base, const char *path, mode_t mode,
 					!ret ? " newly created" : "", dfd_base, cur);
 		if (dfd_cur != dfd_base)
 			close(dfd_cur);
-		else if (cpuset_v1 && !cpuset1_initialize(dfd_base, dfd_final))
-			return syserror_set(-EINVAL, "Failed to initialize cpuset controller in the legacy hierarchy");
+
 		/*
 		 * Leave dfd_final pointing to the last fd we opened so
 		 * it will be automatically zapped if we return early.
@@ -771,17 +515,10 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
 			       const char *cgroup_leaf, bool payload)
 {
 	__do_close int fd_limit = -EBADF, fd_final = -EBADF;
-	bool cpuset_v1 = false;
-
-	/*
-	 * The legacy cpuset controller needs massaging in case inheriting
-	 * settings from its immediate ancestor cgroup hasn't been turned on.
-	 */
-	cpuset_v1 = !is_unified_hierarchy(h) && string_in_list(h->controllers, "cpuset");
 
 	if (payload && cgroup_leaf) {
 		/* With isolation both parts need to not already exist. */
-		fd_limit = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
+		fd_limit = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, false);
 		if (fd_limit < 0)
 			return syswarn_ret(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir);
 
@@ -795,7 +532,7 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
 		 * If we use a separate limit cgroup, the leaf cgroup, i.e. the
 		 * cgroup the container actually resides in, is below fd_limit.
 		 */
-		fd_final = __cgroup_tree_create(h->dfd_lim, cgroup_leaf, 0755, cpuset_v1, false);
+		fd_final = __cgroup_tree_create(h->dfd_lim, cgroup_leaf, 0755, false);
 		if (fd_final < 0) {
 			/* Ensure we don't leave any garbage behind. */
 			if (cgroup_tree_prune(h->dfd_base, cgroup_limit_dir))
@@ -808,7 +545,7 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
 		h->path_con = must_make_path(h->path_lim, cgroup_leaf, NULL);
 
 	} else {
-		fd_final = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
+		fd_final = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, false);
 		if (fd_final < 0)
 			return syswarn_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
 
@@ -895,7 +632,6 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
 		__do_close int fd_pivot = -EBADF;
 		__do_free char *pivot_path = NULL;
 		struct hierarchy *h = ops->hierarchies[i];
-		bool cpuset_v1 = false;
 		int ret;
 
 		/* Monitor might have died before we entered the cgroup. */
@@ -911,9 +647,7 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
 		else
 			pivot_path = must_make_path(CGROUP_PIVOT, NULL);
 
-		cpuset_v1 = !is_unified_hierarchy(h) && string_in_list(h->controllers, "cpuset");
-
-		fd_pivot = __cgroup_tree_create(h->dfd_base, pivot_path, 0755, cpuset_v1, true);
+		fd_pivot = __cgroup_tree_create(h->dfd_base, pivot_path, 0755, true);
 		if (fd_pivot < 0) {
 			SYSWARN("Failed to create pivot cgroup %d(%s)", h->dfd_base, pivot_path);
 			continue;

From 14620221cf9a421023b60e11d05ea902bc6ced2a Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
Date: Tue, 21 Apr 2026 16:42:14 +0200
Subject: [PATCH 04/29] lxc/cgroups: drop cgroup1 mounting logic

Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
---
 src/lxc/cgroups/cgfsng.c | 186 +--------------------------------------
 1 file changed, 4 insertions(+), 182 deletions(-)

diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index cf27b86c57..6e37a6a068 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -1813,75 +1813,6 @@ __cgfsng_ops static void cgfsng_finalize(struct cgroup_ops *ops)
         }
 }
 
-/* cgroup-full:* is done, no need to create subdirs */
-static inline bool cg_mount_needs_subdirs(int cgroup_automount_type)
-{
-	switch (cgroup_automount_type) {
-	case LXC_AUTO_CGROUP_RO:
-		return true;
-	case LXC_AUTO_CGROUP_RW:
-		return true;
-	case LXC_AUTO_CGROUP_MIXED:
-		return true;
-	}
-
-	return false;
-}
-
-/* After $rootfs/sys/fs/container/controller/the/cg/path has been created,
- * remount controller ro if needed and bindmount the cgroupfs onto
- * control/the/cg/path.
- */
-static int cg_legacy_mount_controllers(int cgroup_automount_type, struct hierarchy *h,
-				       char *hierarchy_mnt, char *cgpath,
-				       const char *container_cgroup)
-{
-	__do_free char *sourcepath = NULL;
-	int ret, remount_flags;
-	int flags = MS_BIND;
-
-	if ((cgroup_automount_type == LXC_AUTO_CGROUP_RO) ||
-	    (cgroup_automount_type == LXC_AUTO_CGROUP_MIXED)) {
-		ret = mount(hierarchy_mnt, hierarchy_mnt, "cgroup", MS_BIND, NULL);
-		if (ret < 0)
-			return log_error_errno(-1, errno, "Failed to bind mount \"%s\" onto \"%s\"",
-					       hierarchy_mnt, hierarchy_mnt);
-
-		remount_flags = add_required_remount_flags(hierarchy_mnt,
-							   hierarchy_mnt,
-							   flags | MS_REMOUNT);
-		ret = mount(hierarchy_mnt, hierarchy_mnt, "cgroup",
-			    remount_flags | MS_REMOUNT | MS_BIND | MS_RDONLY,
-			    NULL);
-		if (ret < 0)
-			return log_error_errno(-1, errno, "Failed to remount \"%s\" ro", hierarchy_mnt);
-
-		INFO("Remounted %s read-only", hierarchy_mnt);
-	}
-
-	sourcepath = make_cgroup_path(h, h->at_base, container_cgroup, NULL);
-	if (cgroup_automount_type == LXC_AUTO_CGROUP_RO)
-		flags |= MS_RDONLY;
-
-	ret = mount(sourcepath, cgpath, "cgroup", flags, NULL);
-	if (ret < 0)
-		return log_error_errno(-1, errno, "Failed to mount \"%s\" onto \"%s\"",
-				       h->controllers[0], cgpath);
-	INFO("Mounted \"%s\" onto \"%s\"", h->controllers[0], cgpath);
-
-	if (flags & MS_RDONLY) {
-		remount_flags = add_required_remount_flags(sourcepath, cgpath,
-							   flags | MS_REMOUNT);
-		ret = mount(sourcepath, cgpath, "cgroup", remount_flags, NULL);
-		if (ret < 0)
-			return log_error_errno(-1, errno, "Failed to remount \"%s\" ro", cgpath);
-		INFO("Remounted %s read-only", cgpath);
-	}
-
-	INFO("Completed second stage cgroup automounts for \"%s\"", cgpath);
-	return 0;
-}
-
 /* __cgroupfs_mount
  *
  * Mount cgroup hierarchies directly without using bind-mounts. The main
@@ -1894,7 +1825,7 @@ static int __cgroupfs_mount(int cgroup_automount_type, struct hierarchy *h,
 {
 	__do_close int fd_fs = -EBADF;
 	unsigned int flags = 0;
-	char *fstype;
+	char *fstype = "cgroup2";
 	int ret;
 
 	if (dfd_mnt_cgroupfs < 0)
@@ -1910,10 +1841,8 @@ static int __cgroupfs_mount(int cgroup_automount_type, struct hierarchy *h,
 	    (cgroup_automount_type == LXC_AUTO_CGROUP2_RO))
 		flags |= MOUNT_ATTR_RDONLY;
 
-	if (is_unified_hierarchy(h))
-		fstype = "cgroup2";
-	else
-		fstype = "cgroup";
+	if (!is_unified_hierarchy(h))
+		return ret_errno(EOPNOTSUPP);
 
 	if (can_use_mount_api()) {
 		fd_fs = fs_prepare(fstype, -EBADF, "", 0, 0);
@@ -1970,26 +1899,6 @@ static inline int cgroupfs_mount(int cgroup_automount_type, struct hierarchy *h,
 				dfd_mnt_cgroupfs, hierarchy_mnt);
 }
 
-static inline int cgroupfs_bind_mount(int cgroup_automount_type, struct hierarchy *h,
-				      struct lxc_rootfs *rootfs,
-				      int dfd_mnt_cgroupfs,
-				      const char *hierarchy_mnt)
-{
-	switch (cgroup_automount_type) {
-	case LXC_AUTO_CGROUP_FULL_RO:
-		break;
-	case LXC_AUTO_CGROUP_FULL_RW:
-		break;
-	case LXC_AUTO_CGROUP_FULL_MIXED:
-		break;
-	default:
-		return 0;
-	}
-
-	return __cgroupfs_mount(cgroup_automount_type, h, rootfs,
-				dfd_mnt_cgroupfs, hierarchy_mnt);
-}
-
 __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
 				      struct lxc_handler *handler, int cg_flags)
 {
@@ -1999,7 +1908,6 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
 	bool in_cgroup_ns = false, wants_force_mount = false;
 	struct lxc_conf *conf = handler->conf;
 	struct lxc_rootfs *rootfs = &conf->rootfs;
-	const char *rootfs_mnt = get_rootfs_mnt(rootfs);
 	int ret;
 
 	if (!ops)
@@ -2143,93 +2051,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
 		return syserror_ret(false, "Failed to mount cgroups");
 	}
 
-	/*
-	 * Mount a tmpfs over DEFAULT_CGROUP_MOUNTPOINT. Note that we're
-	 * relying on RESOLVE_BENEATH so we need to skip the leading "/" in the
-	 * DEFAULT_CGROUP_MOUNTPOINT define.
-	 */
-	if (can_use_mount_api()) {
-		fd_fs = fs_prepare("tmpfs", -EBADF, "", 0, 0);
-		if (fd_fs < 0)
-			return log_error_errno(false, errno, "Failed to create new filesystem context for tmpfs");
-
-		ret = fs_set_property(fd_fs, "mode", "0755");
-		if (ret < 0)
-			return log_error_errno(false, errno, "Failed to mount tmpfs onto %d(dev)", fd_fs);
-
-		ret = fs_set_property(fd_fs, "size", "10240k");
-		if (ret < 0)
-			return log_error_errno(false, errno, "Failed to mount tmpfs onto %d(dev)", fd_fs);
-
-		ret = fs_attach(fd_fs, rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
-				PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV,
-				MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV |
-				MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME);
-	} else {
-		cgroup_root = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, NULL);
-		ret = safe_mount(NULL, cgroup_root, "tmpfs",
-				 MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
-				 "size=10240k,mode=755", rootfs_mnt);
-	}
-	if (ret < 0)
-		return log_error_errno(false, errno, "Failed to mount tmpfs on %s",
-				       DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
-
-	dfd_mnt_tmpfs = open_at(rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
-				PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0);
-	if (dfd_mnt_tmpfs < 0)
-		return syserror_ret(false, "Failed to open %d(%s)",
-				    rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
-
-	for (int i = 0; ops->hierarchies[i]; i++) {
-		__do_free char *hierarchy_mnt = NULL, *path2 = NULL;
-		struct hierarchy *h = ops->hierarchies[i];
-
-		ret = mkdirat(dfd_mnt_tmpfs, h->at_mnt, 0000);
-		if (ret < 0)
-			return syserror_ret(false, "Failed to create cgroup at_mnt %d(%s)", dfd_mnt_tmpfs, h->at_mnt);
-
-		if (in_cgroup_ns && wants_force_mount) {
-			/*
-			 * If cgroup namespaces are supported but the container
-			 * will not have CAP_SYS_ADMIN after it has started we
-			 * need to mount the cgroups manually.
-			 */
-			ret = cgroupfs_mount(cgroup_automount_type, h, rootfs,
-					     dfd_mnt_tmpfs, h->at_mnt);
-			if (ret < 0)
-				return false;
-
-			continue;
-		}
-
-		/* Here is where the ancient kernel section begins. */
-		ret = cgroupfs_bind_mount(cgroup_automount_type, h, rootfs,
-					  dfd_mnt_tmpfs, h->at_mnt);
-		if (ret < 0)
-			return false;
-
-		if (!cg_mount_needs_subdirs(cgroup_automount_type))
-			continue;
-
-		if (!cgroup_root)
-			cgroup_root = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, NULL);
-
-		hierarchy_mnt = must_make_path(cgroup_root, h->at_mnt, NULL);
-		path2 = must_make_path(hierarchy_mnt, h->at_base,
-				       ops->container_cgroup, NULL);
-		ret = lxc_mkdir_p(path2, 0755);
-		if (ret < 0 && (errno != EEXIST))
-			return false;
-
-		ret = cg_legacy_mount_controllers(cgroup_automount_type, h,
-						  hierarchy_mnt, path2,
-						  ops->container_cgroup);
-		if (ret < 0)
-			return false;
-	}
-
-	return true;
+	return syserror_ret(false, "Failed to mount cgroups - unsupported cgroup layout");
 }
 
 /* Only root needs to escape to the cgroup of its init. */

From 358a66022f4e15e544f30ac7303fdeb5225cd880 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
Date: Tue, 21 Apr 2026 17:09:07 +0200
Subject: [PATCH 05/29] lxc/conf: drop cgroup1 config options (lxc.cgroup.*)

Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
---
 src/lxc/conf.c    |  7 -------
 src/lxc/conf.h    |  1 -
 src/lxc/confile.c | 35 +----------------------------------
 3 files changed, 1 insertion(+), 42 deletions(-)

diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 571b3fd203..947cbad66f 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -3208,7 +3208,6 @@ struct lxc_conf *lxc_conf_init(void)
 	new->rootfs.fd_path_pin = -EBADF;
 	new->rootfs.dfd_idmapped = -EBADF;
 	new->logfd = -1;
-	INIT_LIST_HEAD(&new->cgroup);
 	INIT_LIST_HEAD(&new->cgroup2);
 	/* Block ("allowlist") all devices by default. */
 	new->bpf_devices.list_type = LXC_BPF_DEVICE_CGROUP_ALLOWLIST;
@@ -4118,11 +4117,6 @@ int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version)
 		namespaced_token	= "lxc.cgroup2.";
 		namespaced_token_len	= STRLITERALLEN("lxc.cgroup2.");
 		list = &c->cgroup2;
-	} else if (version == CGROUP_SUPER_MAGIC) {
-		global_token		= "lxc.cgroup";
-		namespaced_token	= "lxc.cgroup.";
-		namespaced_token_len	= STRLITERALLEN("lxc.cgroup.");
-		list = &c->cgroup;
 	} else {
 		return ret_errno(EINVAL);
 	}
@@ -4370,7 +4364,6 @@ void lxc_conf_free(struct lxc_conf *conf)
 	free(conf->lsm_se_keyring_context);
 	lxc_seccomp_free(&conf->seccomp);
 	lxc_clear_config_caps(conf);
-	lxc_clear_cgroups(conf, "lxc.cgroup", CGROUP_SUPER_MAGIC);
 	lxc_clear_cgroups(conf, "lxc.cgroup2", CGROUP2_SUPER_MAGIC);
 	lxc_clear_cgroups_devices(conf);
 	lxc_clear_hooks(conf, "lxc.hook");
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 762d58901d..ea4e199404 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -417,7 +417,6 @@ struct lxc_conf {
 	struct utsname *utsname;
 
 	struct {
-		struct list_head cgroup;
 		struct list_head cgroup2;
 		struct bpf_devices bpf_devices;
 	};
diff --git a/src/lxc/confile.c b/src/lxc/confile.c
index 8985f15b79..3d310a9b70 100644
--- a/src/lxc/confile.c
+++ b/src/lxc/confile.c
@@ -65,7 +65,6 @@ lxc_config_define(apparmor_profile);
 lxc_config_define(apparmor_raw);
 lxc_config_define(cap_drop);
 lxc_config_define(cap_keep);
-lxc_config_define(cgroup_controller);
 lxc_config_define(cgroup2_controller);
 lxc_config_define(cgroup_dir);
 lxc_config_define(cgroup_monitor_dir);
@@ -206,7 +205,6 @@ static struct lxc_config_t config_jump_table[] = {
 	{ "lxc.cgroup.dir.container",       true,  set_config_cgroup_container_dir,       get_config_cgroup_container_dir,       clr_config_cgroup_container_dir,       },
 	{ "lxc.cgroup.dir",                 true,  set_config_cgroup_dir,                 get_config_cgroup_dir,                 clr_config_cgroup_dir,                 },
 	{ "lxc.cgroup.relative",            true,  set_config_cgroup_relative,            get_config_cgroup_relative,            clr_config_cgroup_relative,            },
-	{ "lxc.cgroup",                     false, set_config_cgroup_controller,          get_config_cgroup_controller,          clr_config_cgroup_controller,          },
 	{ "lxc.console.buffer.size",        true,  set_config_console_buffer_size,        get_config_console_buffer_size,        clr_config_console_buffer_size,        },
 	{ "lxc.console.logfile",            true,  set_config_console_logfile,            get_config_console_logfile,            clr_config_console_logfile,            },
 	{ "lxc.console.path",               true,  set_config_console_path,               get_config_console_path,               clr_config_console_path,               },
@@ -1934,9 +1932,6 @@ static int __set_config_cgroup_controller(const char *key, const char *value,
 	if (version == CGROUP2_SUPER_MAGIC) {
 		token = "lxc.cgroup2.";
 		token_len = 12;
-	} else if (version == CGROUP_SUPER_MAGIC) {
-		token = "lxc.cgroup.";
-		token_len = 11;
 	} else {
 		return ret_errno(EINVAL);
 	}
@@ -1962,22 +1957,12 @@ static int __set_config_cgroup_controller(const char *key, const char *value,
 
 	new_cgroup->version = version;
 
-	if (version == CGROUP2_SUPER_MAGIC)
-		list_add_tail(&new_cgroup->head, &lxc_conf->cgroup2);
-	else
-		list_add_tail(&new_cgroup->head, &lxc_conf->cgroup);
+	list_add_tail(&new_cgroup->head, &lxc_conf->cgroup2);
 	move_ptr(new_cgroup);
 
 	return 0;
 }
 
-static int set_config_cgroup_controller(const char *key, const char *value,
-					struct lxc_conf *lxc_conf, void *data)
-{
-	return __set_config_cgroup_controller(key, value, lxc_conf,
-					      CGROUP_SUPER_MAGIC);
-}
-
 static int set_config_cgroup2_controller(const char *key, const char *value,
 					 struct lxc_conf *lxc_conf, void *data)
 {
@@ -3903,11 +3888,6 @@ static int __get_config_cgroup_controller(const char *key, char *retv,
 		namespaced_token = "lxc.cgroup2.";
 		namespaced_token_len = STRLITERALLEN("lxc.cgroup2.");
 		list = &c->cgroup2;
-	} else if (version == CGROUP_SUPER_MAGIC) {
-		global_token = "lxc.cgroup";
-		namespaced_token = "lxc.cgroup.";
-		namespaced_token_len = STRLITERALLEN("lxc.cgroup.");
-		list = &c->cgroup;
 	} else {
 		return ret_errno(EINVAL);
 	}
@@ -3934,13 +3914,6 @@ static int __get_config_cgroup_controller(const char *key, char *retv,
 	return fulllen;
 }
 
-static int get_config_cgroup_controller(const char *key, char *retv, int inlen,
-					struct lxc_conf *c, void *data)
-{
-	return __get_config_cgroup_controller(key, retv, inlen, c,
-					      CGROUP_SUPER_MAGIC);
-}
-
 static int get_config_cgroup2_controller(const char *key, char *retv, int inlen,
 					 struct lxc_conf *c, void *data)
 {
@@ -4931,12 +4904,6 @@ static inline int clr_config_keyring_session(const char *key,
 	return 0;
 }
 
-static inline int clr_config_cgroup_controller(const char *key,
-					       struct lxc_conf *c, void *data)
-{
-	return lxc_clear_cgroups(c, key, CGROUP_SUPER_MAGIC);
-}
-
 static inline int clr_config_cgroup2_controller(const char *key,
 						struct lxc_conf *c, void *data)
 {

From 2d51b77b1db6e23c43cedb958650a3b9907f6b93 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
Date: Tue, 21 Apr 2026 17:23:42 +0200
Subject: [PATCH 06/29] tests: use lxc.cgroup2 instead of lxc.cgroup

Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
---
 src/tests/get_item.c                  | 16 ++++++++--------
 src/tests/lxc-test-checkpoint-restore |  2 +-
 src/tests/parse_config_file.c         | 12 ++++++------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/tests/get_item.c b/src/tests/get_item.c
index 40cc564adf..52559c1b2b 100644
--- a/src/tests/get_item.c
+++ b/src/tests/get_item.c
@@ -622,22 +622,22 @@ int main(int argc, char *argv[])
 		goto out;
 	}
 
-	ret = c->get_config_item(c, "lxc.cgroup", v3, 2047);
+	ret = c->get_config_item(c, "lxc.cgroup2", v3, 2047);
 	if (ret < 0) {
-		fprintf(stderr, "%d: get_config_item(cgroup.devices) returned %d\n", __LINE__, ret);
+		fprintf(stderr, "%d: get_config_item(cgroup2.devices) returned %d\n", __LINE__, ret);
 		goto out;
 	}
-	printf("%d: get_config_item (cgroup.devices) returned %d %s\n", __LINE__, ret, v3);
+	printf("%d: get_config_item (cgroup2.devices) returned %d %s\n", __LINE__, ret, v3);
 
-	ret = c->get_config_item(c, "lxc.cgroup.devices.allow", v3, 2047);
+	ret = c->get_config_item(c, "lxc.cgroup2.devices.allow", v3, 2047);
 	if (ret < 0) {
-		fprintf(stderr, "%d: get_config_item(cgroup.devices.devices.allow) returned %d\n", __LINE__, ret);
+		fprintf(stderr, "%d: get_config_item(cgroup2.devices.devices.allow) returned %d\n", __LINE__, ret);
 		goto out;
 	}
-	printf("%d: get_config_item (cgroup.devices.devices.allow) returned %d %s\n", __LINE__, ret, v3);
+	printf("%d: get_config_item (cgroup2.devices.devices.allow) returned %d %s\n", __LINE__, ret, v3);
 
-	if (!c->clear_config_item(c, "lxc.cgroup")) {
-		fprintf(stderr, "%d: failed clearing lxc.cgroup\n", __LINE__);
+	if (!c->clear_config_item(c, "lxc.cgroup2")) {
+		fprintf(stderr, "%d: failed clearing lxc.cgroup2\n", __LINE__);
 		goto out;
 	}
 
diff --git a/src/tests/lxc-test-checkpoint-restore b/src/tests/lxc-test-checkpoint-restore
index 21f9b7c652..3a2853d644 100755
--- a/src/tests/lxc-test-checkpoint-restore
+++ b/src/tests/lxc-test-checkpoint-restore
@@ -42,7 +42,7 @@ cat >> "$(lxc-config lxc.lxcpath)/$name/config" <<EOF
 # hax for criu
 lxc.console.path = none
 lxc.tty.max = 0
-lxc.cgroup.devices.deny = c 5:1 rwm
+lxc.cgroup2.devices.deny = c 5:1 rwm
 EOF
 
 lxc-start -n $name -d || FAIL "starting container"
diff --git a/src/tests/parse_config_file.c b/src/tests/parse_config_file.c
index 6bdac3609b..fdc35ae3e8 100644
--- a/src/tests/parse_config_file.c
+++ b/src/tests/parse_config_file.c
@@ -391,19 +391,19 @@ int main(int argc, char *argv[])
 		goto non_test_error;
 	}
 
-	if (set_get_compare_clear_save_load(c, "lxc.cgroup.cpuset.cpus",
+	if (set_get_compare_clear_save_load(c, "lxc.cgroup2.cpuset.cpus",
 					    "1-100", tmpf, false) < 0) {
-		lxc_error("%s\n", "lxc.cgroup.cpuset.cpus");
+		lxc_error("%s\n", "lxc.cgroup2.cpuset.cpus");
 		goto non_test_error;
 	}
 
-	if (!c->set_config_item(c, "lxc.cgroup.cpuset.cpus", "1-100")) {
-		lxc_error("%s\n", "failed to set config item \"lxc.cgroup.cpuset.cpus\" to \"1-100\"");
+	if (!c->set_config_item(c, "lxc.cgroup2.cpuset.cpus", "1-100")) {
+		lxc_error("%s\n", "failed to set config item \"lxc.cgroup2.cpuset.cpus\" to \"1-100\"");
 		return -1;
 	}
 
-	if (!c->set_config_item(c, "lxc.cgroup.memory.limit_in_bytes", "123456789")) {
-		lxc_error("%s\n", "failed to set config item \"lxc.cgroup.memory.limit_in_bytes\" to \"123456789\"");
+	if (!c->set_config_item(c, "lxc.cgroup2.memory.max", "123456789")) {
+		lxc_error("%s\n", "failed to set config item \"lxc.cgroup2.memory.max\" to \"123456789\"");
 		return -1;
 	}
 

From 5ead0bb5d9b67bcc7cc4235474837f4e0dcd28e8 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
Date: Tue, 21 Apr 2026 17:37:17 +0200
Subject: [PATCH 07/29] config/templates: don't use cgroup1 settings

Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
---
 config/templates/common.conf.in | 29 -----------------------------
 config/templates/userns.conf.in |  5 -----
 2 files changed, 34 deletions(-)

diff --git a/config/templates/common.conf.in b/config/templates/common.conf.in
index 7fb109e049..311fbd44e8 100644
--- a/config/templates/common.conf.in
+++ b/config/templates/common.conf.in
@@ -15,35 +15,6 @@ lxc.cap.drop = mac_admin mac_override sys_time sys_module sys_rawio
 # Ensure hostname is changed on clone
 lxc.hook.clone = @LXCHOOKDIR@/clonehostname
 
-# Default legacy cgroup configuration
-#
-# CGroup allowlist
-lxc.cgroup.devices.deny = a
-## Allow any mknod (but not reading/writing the node)
-lxc.cgroup.devices.allow = c *:* m
-lxc.cgroup.devices.allow = b *:* m
-## Allow specific devices
-### /dev/null
-lxc.cgroup.devices.allow = c 1:3 rwm
-### /dev/zero
-lxc.cgroup.devices.allow = c 1:5 rwm
-### /dev/full
-lxc.cgroup.devices.allow = c 1:7 rwm
-### /dev/tty
-lxc.cgroup.devices.allow = c 5:0 rwm
-### /dev/console
-lxc.cgroup.devices.allow = c 5:1 rwm
-### /dev/ptmx
-lxc.cgroup.devices.allow = c 5:2 rwm
-### /dev/random
-lxc.cgroup.devices.allow = c 1:8 rwm
-### /dev/urandom
-lxc.cgroup.devices.allow = c 1:9 rwm
-### /dev/pts/*
-lxc.cgroup.devices.allow = c 136:* rwm
-### fuse
-lxc.cgroup.devices.allow = c 10:229 rwm
-
 # Default unified cgroup configuration
 #
 # CGroup allowlist
diff --git a/config/templates/userns.conf.in b/config/templates/userns.conf.in
index 255dd01a35..b45f601fbf 100644
--- a/config/templates/userns.conf.in
+++ b/config/templates/userns.conf.in
@@ -1,10 +1,5 @@
 # CAP_SYS_ADMIN in init-user-ns is required for cgroup.devices
 #
-# Default legacy cgroup configuration
-#
-lxc.cgroup.devices.deny =
-lxc.cgroup.devices.allow =
-
 # Default unified cgroup configuration
 #
 lxc.cgroup2.devices.deny =

From 45c1dea3bba6411abdafc26b7bfa7f8d10815aa0 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
Date: Tue, 21 Apr 2026 17:39:10 +0200
Subject: [PATCH 08/29] lxc/cgroups: warn if non-unified cgroup layout detected

Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
---
 src/lxc/cgroups/cgroup.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c
index 5e2a7d0993..b8029dade5 100644
--- a/src/lxc/cgroups/cgroup.c
+++ b/src/lxc/cgroups/cgroup.c
@@ -40,14 +40,11 @@ struct cgroup_ops *cgroup_init(struct lxc_conf *conf)
 
 	TRACE("Initialized cgroup driver %s", cgroup_ops->driver);
 
-	if (cgroup_ops->cgroup_layout == CGROUP_LAYOUT_LEGACY)
-		TRACE("Legacy cgroup layout");
-	else if (cgroup_ops->cgroup_layout == CGROUP_LAYOUT_HYBRID)
-		TRACE("Hybrid cgroup layout");
-	else if (cgroup_ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED)
+	if (pure_unified_layout(cgroup_ops)) {
 		TRACE("Unified cgroup layout");
-	else
-		WARN("Unsupported cgroup layout");
+	} else {
+		WARN("Unsupported cgroup layout (%s)", cgroup_layout_name(cgroup_ops->cgroup_layout));
+	}
 
 	return cgroup_ops;
 }

From 89b4c188ad32409e4de06e85375e2d392fcc9334 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
Date: Tue, 21 Apr 2026 17:43:31 +0200
Subject: [PATCH 09/29] doc: mention that legacy/hybrid hierarchy support is
 dropped

Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
---
 doc/lxc.container.conf.sgml.in | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in
index 39efffbe56..ae58e5e3ba 100644
--- a/doc/lxc.container.conf.sgml.in
+++ b/doc/lxc.container.conf.sgml.in
@@ -1558,7 +1558,8 @@
 	ignore <option>lxc.cgroup.</option> settings on systems that only use
 	the unified hierarchy. Conversely, it will ignore
 	<option>lxc.cgroup2.</option> options on systems that only use legacy
-	hierarchies.
+	hierarchies. <option>lxc.cgroup.</option> (legacy and hybrid hierarchy)
+	support is dropped.
       </para>
 
       <para>

From b0b65bac0a043c2c86589ad4b0b67b141ff61e08 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber@stgraber.org>
Date: Wed, 22 Apr 2026 14:05:20 -0400
Subject: [PATCH 10/29] Merge pull request #4671 from mihalicyn/remove_cgroup1

remove cgroup1 support

From 555c80b49ca2c2f2987f2c9d13a3564d963805e6 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
Date: Wed, 22 Apr 2026 09:09:39 +0200
Subject: [PATCH 11/29] lxc/start: assume CLONE_PIDFD and clone3 are supported

We agreed to set 6.12 as a Linux kernel requirement for LXC 7.x line,
it was released in Nov 2024 [1].

Let's drop fallback code for cases when CLONE_PIDFD or clone3 are not supported.
CLONE_PIDFD was added in 5.2
clone3 was added in 5.3

I decided to keep fallback logic for non-supported CLONE_INTO_CGROUP for now, while
it was added in 5.7.

Link: https://github.com/torvalds/linux/commit/adc218676eef25575469234709c2d87185ca223a [1]
Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
---
 src/lxc/start.c | 42 ++++++++++++++----------------------------
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/src/lxc/start.c b/src/lxc/start.c
index 7f269805e0..ddec8f7624 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1774,8 +1774,17 @@ static inline int do_share_ns(void *arg)
 	flags |= CLONE_PARENT;
 	handler->pid = lxc_raw_clone_cb(do_start, handler, CLONE_PIDFD | flags,
 					&handler->pidfd);
-	if (handler->pid < 0)
+	if (handler->pid < 0) {
+		ERROR("Failed to clone process");
 		return -1;
+	}
+
+	if (handler->pidfd < 0) {
+		kill(handler->pid, SIGKILL);
+		handler->pid = -1;
+		ERROR("CLONE_PIDFD isn't supported");
+		return -1;
+	}
 
 	return 0;
 }
@@ -1920,7 +1929,7 @@ static int lxc_spawn(struct lxc_handler *handler)
 		/* Try to spawn directly into target cgroup. */
 		handler->pid = lxc_clone3(&clone_args, CLONE_ARGS_SIZE_VER2);
 		if (handler->pid < 0) {
-			SYSTRACE("Failed to spawn container directly into target cgroup");
+			SYSWARN("Failed to spawn container directly into target cgroup");
 
 			/* Kernel might simply be too old for CLONE_INTO_CGROUP. */
 			resolve_cgroup_clone_flags(handler);
@@ -1931,31 +1940,6 @@ static int lxc_spawn(struct lxc_handler *handler)
 			TRACE("Spawned container directly into target cgroup via cgroup2 fd %d", cgroup_fd);
 		}
 
-		/* Kernel might be too old for clone3(). */
-		if (handler->pid < 0) {
-			SYSTRACE("Failed to spawn container via clone3()");
-
-		/*
-		 * In contrast to all other architectures arm64 verifies that
-		 * the argument we use to retrieve the pidfd with is
-		 * initialized to 0. But we need to be able to initialize it to
-		 * a negative value such as our customary -EBADF so we can
-		 * detect whether this kernel supports pidfds. If the syscall
-		 * returns and the pidfd variable is set to something >= 0 then
-		 * we know this is a kernel supporting pidfds. But if we can't
-		 * set it to -EBADF then this won't work since 0 is a valid
-		 * file descriptor too. And since legacy clone silently ignores
-		 * unknown flags we are left without any way to detect support
-		 * for pidfds. So let's special-case arm64 to not fail starting
-		 * containers.
-		 */
-		#if defined(__aarch64__)
-			handler->pid = lxc_raw_legacy_clone(handler->clone_flags & ~CLONE_PIDFD, NULL);
-		#else
-			handler->pid = lxc_raw_legacy_clone(handler->clone_flags, &handler->pidfd);
-		#endif
-		}
-
 		if (handler->pid < 0) {
 			SYSERROR(LXC_CLONE_ERROR);
 			goto out_delete_net;
@@ -1975,8 +1959,10 @@ static int lxc_spawn(struct lxc_handler *handler)
 		goto out_delete_net;
 
 	/* Verify that we can actually make use of pidfds. */
-	if (!lxc_can_use_pidfd(handler->pidfd))
+	if (!lxc_can_use_pidfd(handler->pidfd)) {
 		close_prot_errno_disarm(handler->pidfd);
+		goto out_delete_net;
+	}
 
 	ret = strnprintf(pidstr, 20, "%d", handler->pid);
 	if (ret < 0)

From d8e9d4da4aabfacc56598db7f97ae301d4603071 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
Date: Wed, 22 Apr 2026 09:33:18 +0200
Subject: [PATCH 12/29] lxc: assume fsopen/open_tree/mount_setattr syscalls are
 supported

fsopen and open_tree were added in 5.2
mount_setattr in 5.12

Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
---
 src/lxc/cgroups/cgfsng.c |  52 +++-----
 src/lxc/conf.c           | 277 +++++++++++++--------------------------
 2 files changed, 104 insertions(+), 225 deletions(-)

diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index 6e37a6a068..17f499b98f 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -1844,44 +1844,24 @@ static int __cgroupfs_mount(int cgroup_automount_type, struct hierarchy *h,
 	if (!is_unified_hierarchy(h))
 		return ret_errno(EOPNOTSUPP);
 
-	if (can_use_mount_api()) {
-		fd_fs = fs_prepare(fstype, -EBADF, "", 0, 0);
-		if (fd_fs < 0)
-			return log_error_errno(-errno, errno, "Failed to prepare filesystem context for %s", fstype);
-
-		if (!is_unified_hierarchy(h)) {
-			for (const char **it = (const char **)h->controllers; it && *it; it++) {
-				if (strnequal(*it, "name=", STRLITERALLEN("name=")))
-					ret = fs_set_property(fd_fs, "name", *it + STRLITERALLEN("name="));
-				else
-					ret = fs_set_property(fd_fs, *it, "");
-				if (ret < 0)
-					return log_error_errno(-errno, errno, "Failed to add %s controller to cgroup filesystem context %d(dev)", *it, fd_fs);
-			}
-		}
-
-		ret = fs_attach(fd_fs, dfd_mnt_cgroupfs, hierarchy_mnt,
-				PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH,
-				flags);
-	} else {
-		__do_free char *controllers = NULL, *target = NULL;
-		unsigned int old_flags = 0;
-		const char *rootfs_mnt;
-
-		if (!is_unified_hierarchy(h)) {
-			controllers = lxc_string_join(",", (const char **)h->controllers, false);
-			if (!controllers)
-				return ret_errno(ENOMEM);
+	fd_fs = fs_prepare(fstype, -EBADF, "", 0, 0);
+	if (fd_fs < 0)
+		return log_error_errno(-errno, errno, "Failed to prepare filesystem context for %s", fstype);
+
+	if (!is_unified_hierarchy(h)) {
+		for (const char **it = (const char **)h->controllers; it && *it; it++) {
+			if (strnequal(*it, "name=", STRLITERALLEN("name=")))
+				ret = fs_set_property(fd_fs, "name", *it + STRLITERALLEN("name="));
+			else
+				ret = fs_set_property(fd_fs, *it, "");
+			if (ret < 0)
+				return log_error_errno(-errno, errno, "Failed to add %s controller to cgroup filesystem context %d(dev)", *it, fd_fs);
 		}
-
-		rootfs_mnt = get_rootfs_mnt(rootfs);
-		ret = mnt_attributes_old(flags, &old_flags);
-		if (ret)
-			return log_error_errno(-EINVAL, EINVAL, "Unsupported mount properties specified");
-
-		target = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, hierarchy_mnt, NULL);
-		ret = safe_mount(NULL, target, fstype, old_flags, controllers, rootfs_mnt);
 	}
+
+	ret = fs_attach(fd_fs, dfd_mnt_cgroupfs, hierarchy_mnt,
+			PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH,
+			flags);
 	if (ret < 0)
 		return log_error_errno(ret, errno, "Failed to mount %s filesystem onto %d(%s)",
 				       fstype, dfd_mnt_cgroupfs, maybe_empty(hierarchy_mnt));
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 947cbad66f..4d366d04b5 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -808,19 +808,16 @@ static int lxc_setup_ttys(struct lxc_conf *conf)
 						       "Failed to unlink %d(%s)",
 						       rootfs->dfd_dev, tty_name);
 
-			if (can_use_mount_api())
-				ret = fd_bind_mount(tty->pty, "",
-						    PROTECT_OPATH_FILE,
-						    PROTECT_LOOKUP_BENEATH_XDEV,
-						    fd_to, "",
-						    PROTECT_OPATH_FILE,
-						    PROTECT_LOOKUP_BENEATH_XDEV,
-						    0,
-						    0,
-						    0,
-						    false);
-			else
-				ret = mount_fd(tty->pty, fd_to, "none", MS_BIND, 0);
+			ret = fd_bind_mount(tty->pty, "",
+					    PROTECT_OPATH_FILE,
+					    PROTECT_LOOKUP_BENEATH_XDEV,
+					    fd_to, "",
+					    PROTECT_OPATH_FILE,
+					    PROTECT_LOOKUP_BENEATH_XDEV,
+					    0,
+					    0,
+					    0,
+					    false);
 			if (ret < 0)
 				return log_error_errno(-errno, errno,
 						       "Failed to bind mount \"%s\" onto \"%s\"",
@@ -845,19 +842,16 @@ static int lxc_setup_ttys(struct lxc_conf *conf)
 						       "Failed to create tty mount target %d(%s)",
 						       rootfs->dfd_dev, rootfs->buf);
 
-			if (can_use_mount_api())
-				ret = fd_bind_mount(tty->pty, "",
-						    PROTECT_OPATH_FILE,
-						    PROTECT_LOOKUP_BENEATH_XDEV,
-						    fd_to, "",
-						    PROTECT_OPATH_FILE,
-						    PROTECT_LOOKUP_BENEATH,
-						    0,
-						    0,
-						    0,
-						    false);
-			else
-				ret = mount_fd(tty->pty, fd_to, "none", MS_BIND, 0);
+			ret = fd_bind_mount(tty->pty, "",
+					    PROTECT_OPATH_FILE,
+					    PROTECT_LOOKUP_BENEATH_XDEV,
+					    fd_to, "",
+					    PROTECT_OPATH_FILE,
+					    PROTECT_LOOKUP_BENEATH,
+					    0,
+					    0,
+					    0,
+					    false);
 			if (ret < 0)
 				return log_error_errno(-errno, errno,
 						       "Failed to bind mount \"%s\" onto \"%s\"",
@@ -1017,37 +1011,23 @@ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs,
 		goto reset_umask;
 	}
 
-	if (can_use_mount_api()) {
-		fd_fs = fs_prepare("tmpfs", -EBADF, "", 0, 0);
-		if (fd_fs < 0)
-			return log_error_errno(-errno, errno, "Failed to prepare filesystem context for tmpfs");
+	fd_fs = fs_prepare("tmpfs", -EBADF, "", 0, 0);
+	if (fd_fs < 0)
+		return log_error_errno(-errno, errno, "Failed to prepare filesystem context for tmpfs");
 
-		sprintf(mount_options, "%zu", tmpfs_size);
+	sprintf(mount_options, "%zu", tmpfs_size);
 
-		ret = fs_set_property(fd_fs, "mode", "0755");
-		if (ret < 0)
-			return log_error_errno(-errno, errno, "Failed to mount tmpfs onto %d(dev)", fd_fs);
-
-		ret = fs_set_property(fd_fs, "size", mount_options);
-		if (ret < 0)
-			return log_error_errno(-errno, errno, "Failed to mount tmpfs onto %d(dev)", fd_fs);
-
-		ret = fs_attach(fd_fs, rootfs->dfd_mnt, "dev",
-				PROTECT_OPATH_DIRECTORY,
-				PROTECT_LOOKUP_BENEATH_XDEV, 0);
-	} else {
-		__do_free char *fallback_path = NULL;
+	ret = fs_set_property(fd_fs, "mode", "0755");
+	if (ret < 0)
+		return log_error_errno(-errno, errno, "Failed to mount tmpfs onto %d(dev)", fd_fs);
 
-		sprintf(mount_options, "size=%zu,mode=755", tmpfs_size);
-		DEBUG("Using mount options: %s", mount_options);
+	ret = fs_set_property(fd_fs, "size", mount_options);
+	if (ret < 0)
+		return log_error_errno(-errno, errno, "Failed to mount tmpfs onto %d(dev)", fd_fs);
 
-		if (path) {
-			fallback_path = must_make_path(path, "/dev", NULL);
-			ret = safe_mount("none", fallback_path, "tmpfs", 0, mount_options, path);
-		} else {
-			ret = safe_mount("none", "dev", "tmpfs", 0, mount_options, NULL);
-		}
-	}
+	ret = fs_attach(fd_fs, rootfs->dfd_mnt, "dev",
+			PROTECT_OPATH_DIRECTORY,
+			PROTECT_LOOKUP_BENEATH_XDEV, 0);
 	if (ret < 0) {
 		SYSERROR("Failed to mount tmpfs on \"%s\"", path);
 		goto reset_umask;
@@ -1160,35 +1140,16 @@ static int lxc_fill_autodev(struct lxc_rootfs *rootfs)
 		if (ret < 0)
 			return ret_errno(EIO);
 
-		if (can_use_mount_api()) {
-			ret = fd_bind_mount(rootfs->dfd_host, rootfs->buf,
-					    PROTECT_OPATH_FILE,
-					    PROTECT_LOOKUP_BENEATH_XDEV,
-					    rootfs->dfd_dev, device->name,
-					    PROTECT_OPATH_FILE,
-					    PROTECT_LOOKUP_BENEATH,
-					    0,
-					    0,
-					    0,
-					    false);
-		} else {
-			char path[PATH_MAX];
-
-			ret = strnprintf(rootfs->buf, sizeof(rootfs->buf), "/dev/%s", device->name);
-			if (ret < 0)
-				return ret_errno(EIO);
-
-			ret = strnprintf(path, sizeof(path), "%s/dev/%s", get_rootfs_mnt(rootfs), device->name);
-			if (ret < 0)
-				return log_error(-1, "Failed to create device path for %s", device->name);
-
-			ret = safe_mount(rootfs->buf, path, 0, MS_BIND, NULL, get_rootfs_mnt(rootfs));
-			if (ret < 0)
-				return log_error_errno(-1, errno, "Failed to bind mount host device node \"%s\" to \"%s\"", rootfs->buf, path);
-
-			DEBUG("Bind mounted host device node \"%s\" to \"%s\"", rootfs->buf, path);
-			continue;
-		}
+		ret = fd_bind_mount(rootfs->dfd_host, rootfs->buf,
+				    PROTECT_OPATH_FILE,
+				    PROTECT_LOOKUP_BENEATH_XDEV,
+				    rootfs->dfd_dev, device->name,
+				    PROTECT_OPATH_FILE,
+				    PROTECT_LOOKUP_BENEATH,
+				    0,
+				    0,
+				    0,
+				    false);
 		DEBUG("Bind mounted host device %d(%s) to %d(%s)", rootfs->dfd_host, rootfs->buf, rootfs->dfd_dev, device->name);
 	}
 	(void)umask(cmask);
@@ -1496,104 +1457,48 @@ static int lxc_setup_devpts_child(struct lxc_handler *handler)
 	if (ret < 0 && errno != EEXIST)
 		return log_error_errno(-1, errno, "Failed to create \"/dev/pts\" directory");
 
-	if (can_use_mount_api()) {
-		fd_fs = fs_prepare("devpts", -EBADF, "", 0, 0);
-		if (fd_fs < 0)
-			return syserror("Failed to prepare filesystem context for devpts");
-
-		ret = fs_set_property(fd_fs, "source", "devpts");
-		if (ret < 0)
-			SYSTRACE("Failed to set \"source=devpts\" on devpts filesystem context %d", fd_fs);
-
-		ret = fs_set_property(fd_fs, "gid", "5");
-		if (ret < 0)
-			SYSTRACE("Failed to set \"gid=5\" on devpts filesystem context %d", fd_fs);
-
-		ret = fs_set_flag(fd_fs, "newinstance");
-		if (ret < 0)
-			return syserror("Failed to set \"newinstance\" property on devpts filesystem context %d", fd_fs);
-
-		ret = fs_set_property(fd_fs, "ptmxmode", "0666");
-		if (ret < 0)
-			return syserror("Failed to set \"ptmxmode=0666\" property on devpts filesystem context %d", fd_fs);
-
-		ret = fs_set_property(fd_fs, "mode", "0620");
-		if (ret < 0)
-			return syserror("Failed to set \"mode=0620\" property on devpts filesystem context %d", fd_fs);
+	fd_fs = fs_prepare("devpts", -EBADF, "", 0, 0);
+	if (fd_fs < 0)
+		return syserror("Failed to prepare filesystem context for devpts");
 
-		ret = fs_set_property(fd_fs, "max", fdstr(pty_max));
-		if (ret < 0)
-			return syserror("Failed to set \"max=%zu\" property on devpts filesystem context %d", conf->pty_max, fd_fs);
-
-		ret = fsconfig(fd_fs, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
-		if (ret < 0)
-			return syserror("Failed to finalize filesystem context %d", fd_fs);
-
-		devpts_fd = fsmount(fd_fs, FSMOUNT_CLOEXEC, MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC);
-		if (devpts_fd < 0)
-			return syserror("Failed to create new mount for filesystem context %d", fd_fs);
-		TRACE("Created detached devpts mount %d", devpts_fd);
-
-		ret = move_mount(devpts_fd, "", rootfs->dfd_dev, "pts", MOVE_MOUNT_F_EMPTY_PATH);
-		if (ret)
-			return syserror("Failed to attach devpts mount %d to %d/pts", conf->devpts_fd, rootfs->dfd_dev);
-
-		DEBUG("Attached detached devpts mount %d to %d/pts", devpts_fd, rootfs->dfd_dev);
-	} else {
-		char **opts;
-		char devpts_mntopts[256];
-		char *mntopt_sets[5];
-		char default_devpts_mntopts[256] = "gid=5,newinstance,ptmxmode=0666,mode=0620";
-
-		/*
-		 * Fallback codepath in case the new mount API can't be used to
-		 * create detached mounts.
-		 */
-
-		ret = strnprintf(devpts_mntopts, sizeof(devpts_mntopts), "%s,max=%zu",
-				default_devpts_mntopts, pty_max);
-		if (ret < 0)
-			return -1;
+	ret = fs_set_property(fd_fs, "source", "devpts");
+	if (ret < 0)
+		SYSTRACE("Failed to set \"source=devpts\" on devpts filesystem context %d", fd_fs);
 
-		/* Create mountpoint for devpts instance. */
-		ret = mkdirat(rootfs->dfd_dev, "pts", 0755);
-		if (ret < 0 && errno != EEXIST)
-			return log_error_errno(-1, errno, "Failed to create \"/dev/pts\" directory");
+	ret = fs_set_property(fd_fs, "gid", "5");
+	if (ret < 0)
+		SYSTRACE("Failed to set \"gid=5\" on devpts filesystem context %d", fd_fs);
 
-		/* gid=5 && max= */
-		mntopt_sets[0] = devpts_mntopts;
+	ret = fs_set_flag(fd_fs, "newinstance");
+	if (ret < 0)
+		return syserror("Failed to set \"newinstance\" property on devpts filesystem context %d", fd_fs);
 
-		/* !gid=5 && max= */
-		mntopt_sets[1] = devpts_mntopts + STRLITERALLEN("gid=5") + 1;
+	ret = fs_set_property(fd_fs, "ptmxmode", "0666");
+	if (ret < 0)
+		return syserror("Failed to set \"ptmxmode=0666\" property on devpts filesystem context %d", fd_fs);
 
-		/* gid=5 && !max= */
-		mntopt_sets[2] = default_devpts_mntopts;
+	ret = fs_set_property(fd_fs, "mode", "0620");
+	if (ret < 0)
+		return syserror("Failed to set \"mode=0620\" property on devpts filesystem context %d", fd_fs);
 
-		/* !gid=5 && !max= */
-		mntopt_sets[3] = default_devpts_mntopts + STRLITERALLEN("gid=5") + 1;
+	ret = fs_set_property(fd_fs, "max", fdstr(pty_max));
+	if (ret < 0)
+		return syserror("Failed to set \"max=%zu\" property on devpts filesystem context %d", conf->pty_max, fd_fs);
 
-		/* end */
-		mntopt_sets[4] = NULL;
+	ret = fsconfig(fd_fs, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+	if (ret < 0)
+		return syserror("Failed to finalize filesystem context %d", fd_fs);
 
-		for (ret = -1, opts = mntopt_sets; opts && *opts; opts++) {
-			/* mount new devpts instance */
-			ret = mount_at(rootfs->dfd_dev, "", 0,
-				       rootfs->dfd_dev, "pts", PROTECT_LOOKUP_BENEATH,
-				       "devpts", MS_NOSUID | MS_NOEXEC, *opts);
-			if (ret == 0)
-				break;
-		}
-		if (ret < 0)
-			return log_error_errno(-1, errno, "Failed to mount new devpts instance");
+	devpts_fd = fsmount(fd_fs, FSMOUNT_CLOEXEC, MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC);
+	if (devpts_fd < 0)
+		return syserror("Failed to create new mount for filesystem context %d", fd_fs);
+	TRACE("Created detached devpts mount %d", devpts_fd);
 
-		devpts_fd = open_at(rootfs->dfd_dev, "pts", PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0);
-		if (devpts_fd < 0) {
-			devpts_fd = -EBADF;
-			TRACE("Failed to create detached devpts mount");
-		}
+	ret = move_mount(devpts_fd, "", rootfs->dfd_dev, "pts", MOVE_MOUNT_F_EMPTY_PATH);
+	if (ret)
+		return syserror("Failed to attach devpts mount %d to %d/pts", conf->devpts_fd, rootfs->dfd_dev);
 
-		DEBUG("Mounted new devpts instance with options \"%s\"", *opts);
-	}
+	DEBUG("Attached detached devpts mount %d to %d/pts", devpts_fd, rootfs->dfd_dev);
 
 	handler->conf->devpts_fd = move_fd(devpts_fd);
 
@@ -1739,10 +1644,7 @@ static int bind_mount_console(int fd_devpts, struct lxc_rootfs *rootfs,
 	 * Note, there are intentionally no open or lookup restrictions since
 	 * we're operating directly on the fd.
 	 */
-	if (can_use_mount_api())
-		return fd_bind_mount(fd_pty, "", 0, 0, fd_to, "", 0, 0, 0, 0, 0, false);
-
-	return mount_fd(fd_pty, fd_to, "none", MS_BIND, 0);
+	return fd_bind_mount(fd_pty, "", 0, 0, fd_to, "", 0, 0, 0, 0, 0, false);
 }
 
 static int lxc_setup_dev_console(int fd_devpts, struct lxc_rootfs *rootfs,
@@ -1871,21 +1773,18 @@ static int lxc_setup_ttydir_console(int fd_devpts, struct lxc_rootfs *rootfs,
 		return syserror("Failed to open \"%d/console\"", fd_ttydir);
 
 	/* bind mount '/dev/<ttydir>/console' to '/dev/console' */
-	if (can_use_mount_api())
-		ret = fd_bind_mount(fd_dev_console,
-				    "",
-				    PROTECT_OPATH_FILE,
-				    PROTECT_LOOKUP_BENEATH_XDEV,
-				    fd_reg_console,
-				    "",
-				    PROTECT_OPATH_FILE,
-				    PROTECT_LOOKUP_BENEATH,
-				    0,
-				    0,
-				    0,
-				    false);
-	else
-		ret = mount_fd(fd_dev_console, fd_reg_console, "none", MS_BIND, 0);
+	ret = fd_bind_mount(fd_dev_console,
+			    "",
+			    PROTECT_OPATH_FILE,
+			    PROTECT_LOOKUP_BENEATH_XDEV,
+			    fd_reg_console,
+			    "",
+			    PROTECT_OPATH_FILE,
+			    PROTECT_LOOKUP_BENEATH,
+			    0,
+			    0,
+			    0,
+			    false);
 	if (ret < 0)
 		return syserror("Failed to mount \"%d\" on \"%d\"",
 				fd_dev_console, fd_reg_console);

From 4380d21eb05833ba9878dc0d476e2ff48d1c109e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber@stgraber.org>
Date: Wed, 22 Apr 2026 17:35:02 -0400
Subject: [PATCH 13/29] Merge pull request #4672 from
 mihalicyn/assume_new_enough_kernel

assume CLONE_PIDFD, clone3, new mount api are supported

From 2f60da605485d022b43b127123c08e6425a64ac6 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Date: Thu, 25 Jul 2024 16:49:44 +0200
Subject: [PATCH 14/29] apparmor: allow nosymfollow remounts

We need this for new versions of systemd, because it heavily uses
MS_NOSYMFOLLOW these days.

Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
---
 src/lxc/lsm/apparmor.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c
index 9f31840ff7..1a53f3b1eb 100644
--- a/src/lxc/lsm/apparmor.c
+++ b/src/lxc/lsm/apparmor.c
@@ -689,6 +689,7 @@ static const struct mntopt_t {
 	{ ",nodev", sizeof(",nodev")-1 },
 	{ ",nosuid", sizeof(",nosuid")-1 },
 	{ ",noexec", sizeof(",noexec")-1 },
+	{ ",nosymfollow", sizeof(",nosymfollow")-1 },
 };
 
 static void append_remount_rule(char **profile, size_t *size, const char *rule)

From 0153a7855b29d592ebe0f12a4b7635390b45aa14 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Date: Thu, 25 Jul 2024 16:57:13 +0200
Subject: [PATCH 15/29] apparmor: allow nosymfollow remounts

Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
---
 config/apparmor/abstractions/container-base.in | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/config/apparmor/abstractions/container-base.in b/config/apparmor/abstractions/container-base.in
index 87982fda3d..c7fc0d4cf7 100644
--- a/config/apparmor/abstractions/container-base.in
+++ b/config/apparmor/abstractions/container-base.in
@@ -128,6 +128,24 @@
   mount options=(ro,remount,bind,noexec,nodev),
   mount options=(ro,remount,bind,nodev,nosuid),
   mount options=(ro,remount,bind,nosuid,noexec,nodev),
+  mount options=(ro,remount,bind,noatime),
+  mount options=(ro,remount,bind,noatime,nodev),
+  mount options=(ro,remount,bind,noatime,noexec),
+  mount options=(ro,remount,bind,noatime,nosuid),
+  mount options=(ro,remount,bind,noatime,noexec,nodev),
+  mount options=(ro,remount,bind,noatime,nosuid,nodev),
+  mount options=(ro,remount,bind,noatime,nosuid,noexec),
+  mount options=(ro,remount,bind,noatime,nosuid,noexec,nodev),
+  mount options=(ro,remount,bind,nosuid,noexec,strictatime),
+  mount options=(ro,remount,nosuid,noexec,strictatime),
+  mount options=(ro,remount,bind,nosymfollow),
+  mount options=(ro,remount,bind,nosymfollow,nodev),
+  mount options=(ro,remount,bind,nosymfollow,noexec),
+  mount options=(ro,remount,bind,nosymfollow,nosuid),
+  mount options=(ro,remount,bind,nosymfollow,noexec,nodev),
+  mount options=(ro,remount,bind,nosymfollow,nosuid,nodev),
+  mount options=(ro,remount,bind,nosymfollow,nosuid,noexec),
+  mount options=(ro,remount,bind,nosymfollow,nosuid,noexec,nodev),
 
   # allow moving mounts except for /proc, /sys and /dev
   mount options=(rw,move) /[^spd]*{,/**},

From 116abab180e50d06d5b048888990cfff6449444e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber@stgraber.org>
Date: Thu, 23 Apr 2026 10:03:00 -0400
Subject: [PATCH 16/29] Merge pull request #4466 from
 mihalicyn/apparmor_nosymfollow

apparmor: allow nosymfollow remounts

From f1540aaf0d290d057512369d64eeb2a9c0159445 Mon Sep 17 00:00:00 2001
From: Fernando Picazo <fernando.picazo@outlook.com>
Date: Fri, 5 Sep 2025 22:33:36 -0500
Subject: [PATCH 17/29] lsm/apparmor: allow binfmt_misc RW mounts

Signed-off-by: Fernando Picazo <fernando.picazo@outlook.com>
[ alex: fully reworked to match logic in Incus ]
Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
---
 src/lxc/lsm/apparmor.c | 44 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 3 deletions(-)

diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c
index 1a53f3b1eb..256f8c12cb 100644
--- a/src/lxc/lsm/apparmor.c
+++ b/src/lxc/lsm/apparmor.c
@@ -84,9 +84,23 @@ static const char AA_PROFILE_BASE[] =
 "  # deny access under /proc/bus to avoid e.g. messing with pci devices directly\n"
 "  deny @{PROC}/bus/** wklx,\n"
 "\n"
-"  # deny writes in /proc/sys/fs but allow binfmt_misc to be mounted\n"
+"  # allow binfmt_misc to be mounted\n"
 "  mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,\n"
-"  deny @{PROC}/sys/fs/** wklx,\n"
+"\n"
+"  # deny writes in /proc/sys/fs except /proc/sys/fs/binfmt_misc\n"
+"  deny @{PROC}/sys/fs/[^b]*{,/**} wklx,\n"
+"  deny @{PROC}/sys/fs/b[^i]*{,/**} wklx,\n"
+"  deny @{PROC}/sys/fs/bi[^n]*{,/**} wklx,\n"
+"  deny @{PROC}/sys/fs/bin[^f]*{,/**} wklx,\n"
+"  deny @{PROC}/sys/fs/binf[^m]*{,/**} wklx,\n"
+"  deny @{PROC}/sys/fs/binfm[^t]*{,/**} wklx,\n"
+"  deny @{PROC}/sys/fs/binfmt[^_]*{,/**} wklx,\n"
+"  deny @{PROC}/sys/fs/binfmt_[^m]*{,/**} wklx,\n"
+"  deny @{PROC}/sys/fs/binfmt_m[^i]*{,/**} wklx,\n"
+"  deny @{PROC}/sys/fs/binfmt_mi[^s]*{,/**} wklx,\n"
+"  deny @{PROC}/sys/fs/binfmt_mis[^c]*{,/**} wklx,\n"
+"  deny @{PROC}/sys/fs/binfmt_misc?*{,/**} wklx,\n"
+"  deny @{PROC}/sys/fs?*{,/**} wklx,\n"
 "\n"
 "  # allow efivars to be mounted, writing to it will be blocked though\n"
 "  mount fstype=efivarfs -> /sys/firmware/efi/efivars/,\n"
@@ -172,10 +186,28 @@ static const char AA_PROFILE_BASE[] =
 "  mount options=(rw,move) /sys?*{,/**},\n"
 "\n";
 
+static const char AA_PROFILE_BASE_PRIVILEGED[] =
+"  deny /proc/sys/fs/binfmt_misc/{,**} wklx,\n"
+"\n";
+
 static const char AA_PROFILE_BASE_NO_NESTING[] =
 "\n"
 "  # generated by: lxc-generate-aa-rules.py container-rules.base\n"
-"  deny /proc/sys/[^kn]*{,/**} wklx,\n"
+"  deny /proc/sys/[^fkn]*{,/**} wklx,\n"
+"  deny /proc/sys/f[^s]*{,/**} wklx,\n"
+"  deny /proc/sys/fs/[^b]*{,/**} wklx,\n"
+"  deny /proc/sys/fs/b[^i]*{,/**} wklx,\n"
+"  deny /proc/sys/fs/bi[^n]*{,/**} wklx,\n"
+"  deny /proc/sys/fs/bin[^f]*{,/**} wklx,\n"
+"  deny /proc/sys/fs/binf[^m]*{,/**} wklx,\n"
+"  deny /proc/sys/fs/binfm[^t]*{,/**} wklx,\n"
+"  deny /proc/sys/fs/binfmt[^_]*{,/**} wklx,\n"
+"  deny /proc/sys/fs/binfmt_[^m]*{,/**} wklx,\n"
+"  deny /proc/sys/fs/binfmt_m[^i]*{,/**} wklx,\n"
+"  deny /proc/sys/fs/binfmt_mi[^s]*{,/**} wklx,\n"
+"  deny /proc/sys/fs/binfmt_mis[^c]*{,/**} wklx,\n"
+"  deny /proc/sys/fs/binfmt_misc?*{,/**} wklx,\n"
+"  deny /proc/sys/fs?*{,/**} wklx,\n"
 "  deny /proc/sys/k[^e]*{,/**} wklx,\n"
 "  deny /proc/sys/ke[^r]*{,/**} wklx,\n"
 "  deny /proc/sys/ker[^n]*{,/**} wklx,\n"
@@ -338,6 +370,8 @@ static const char AA_PROFILE_UNPRIVILEGED[] =
 "  ### Configuration: unprivileged container\n"
 "  pivot_root,\n"
 "\n"
+"  mount fstype=binfmt_misc,\n"
+"\n"
 "  # Allow modifying mount propagation\n"
 "  mount options=(rw,make-slave) -> /{,**},\n"
 "  mount options=(rw,make-rslave) -> /{,**},\n"
@@ -759,6 +793,10 @@ static char *get_apparmor_profile_content(struct lsm_ops *ops, struct lxc_conf *
 	must_append_sized(&profile, &size, AA_PROFILE_BASE,
 	                  STRARRAYLEN(AA_PROFILE_BASE));
 
+	if (is_privileged(conf))
+		must_append_sized(&profile, &size, AA_PROFILE_BASE_PRIVILEGED,
+				  STRARRAYLEN(AA_PROFILE_BASE_PRIVILEGED));
+
 	if (!conf->lsm_aa_allow_nesting)
 		must_append_sized(&profile, &size, AA_PROFILE_BASE_NO_NESTING,
 		                  STRARRAYLEN(AA_PROFILE_BASE_NO_NESTING));

From 282b60e60fccfd91c5e60325164ad2d0dfd6f352 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber@stgraber.org>
Date: Thu, 23 Apr 2026 11:34:19 -0400
Subject: [PATCH 18/29] Merge pull request #4673 from mihalicyn/binfmt_rw_mount

lsm/apparmor: allow binfmt_misc RW mounts

From 8c92f4347114674c0889005ff2240503e9d35b1e Mon Sep 17 00:00:00 2001
From: Mathias Gibbens <gibmat@debian.org>
Date: Sat, 25 Apr 2026 20:11:53 +0000
Subject: [PATCH 19/29] tests/lxc-test-lxc-attach: Increase sleep time

On riscv64 architectures, a single second sleep doesn't appear to be
sufficient to work around the busybox pipe closure bug, and the test
hangs forever. Increase to three seconds.

Signed-off-by: Mathias Gibbens <gibmat@debian.org>
---
 src/tests/lxc-test-lxc-attach | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/lxc-test-lxc-attach b/src/tests/lxc-test-lxc-attach
index 720545f994..75eaa8a92d 100755
--- a/src/tests/lxc-test-lxc-attach
+++ b/src/tests/lxc-test-lxc-attach
@@ -221,7 +221,7 @@ busybox tee --help >/dev/null 2>&1 || FAIL "missing busybox's tee applet"
 
 out=$(mktemp /tmp/out_XXXX)
 BS=1000000
-( sleep 3; echo "echo DATASTART ; dd if=/dev/urandom bs=$BS count=1 status=none | hexdump | tee /root/large-data.txt ; echo DATAEND" ; sleep 1 ) | \
+( sleep 3; echo "echo DATASTART ; dd if=/dev/urandom bs=$BS count=1 status=none | hexdump | tee /root/large-data.txt ; echo DATAEND" ; sleep 3 ) | \
 	script -q -e -c "lxc-attach -n busy -l trace -o \"${ATTACH_LOG}\"" | \
 	sed -n '/DATASTART/,/DATAEND/{/DATASTART/d;/DATAEND/d;s/[\r\n]*$//;p}' > $out
 

From 16f6c890332be3fe97c51c5c91a07634c79ae283 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber@stgraber.org>
Date: Sat, 25 Apr 2026 16:28:41 -0400
Subject: [PATCH 20/29] Merge pull request #4674 from
 gibmat/extend-test-sleep-riscv64

tests/lxc-test-lxc-attach: Increase sleep time

From 071cd4c60588d5a36e9dac4b820d3e858a3a7512 Mon Sep 17 00:00:00 2001
From: Serge Hallyn <serge@hallyn.com>
Date: Sun, 26 Apr 2026 22:15:41 +0200
Subject: [PATCH 21/29] lvm.c: make sure tp gets freed

tp is __do_free.  However, when we detect that it is not a thinpool,
we set it to NULL, so that it can't get freed on exit.

coverity id 1461741

Signed-off-by: Serge Hallyn <serge@hallyn.com>
---
 src/lxc/storage/lvm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/lxc/storage/lvm.c b/src/lxc/storage/lvm.c
index d563144518..dbc1ccf9fa 100644
--- a/src/lxc/storage/lvm.c
+++ b/src/lxc/storage/lvm.c
@@ -137,6 +137,7 @@ static int do_lvm_create(const char *path, uint64_t size, const char *thinpool)
 			return log_error(-EINVAL, "Failed to detect whether \"%s\" is a thinpool", tp);
 		} else if (!ret) {
 			TRACE("Detected that \"%s\" is not a thinpool", tp);
+			free(tp);
 			tp = NULL;
 		} else {
 			TRACE("Detected \"%s\" is a thinpool", tp);

From 7b1a5eab2da1dc5cfc80e7c11c876155d15a4e60 Mon Sep 17 00:00:00 2001
From: Serge Hallyn <serge@hallyn.com>
Date: Sun, 26 Apr 2026 22:27:59 +0200
Subject: [PATCH 22/29] Don't leak an open fd

The dfd_idmapped was being dup'd, but not freed.

If we ever change it so that storage_put closes the dfd_idmapped
fd, then we'll want to un-do this.  For now, this is a kludgy way
to avoid leaking the open fd, but should work.

The new_rootfs->dfd_idmapped gets dup'd from
c->lxc_conf->rootfs.dfd_idmapped.  new_rootfs eventually gets
assigned to new->rootfs (where new is a struct storage, usually
called 'bdev').  From here there are error paths which free the
bdev and return NULL, and a success path that returns bdev.  But
neither the error path nor the caller do anything really with the
bdev, and storage_put() doesn't close that fd.

So close the dfd_idmapped in both paths.

Coverity id: 1641426

Signed-off-by: Serge Hallyn <serge@hallyn.com>
---
 src/lxc/storage/storage.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/lxc/storage/storage.c b/src/lxc/storage/storage.c
index 497cad7882..2490d3c333 100644
--- a/src/lxc/storage/storage.c
+++ b/src/lxc/storage/storage.c
@@ -516,6 +516,8 @@ struct lxc_storage *storage_copy(struct lxc_container *c, const char *cname,
 	}
 
 on_success:
+	/* The only caller, copy_storage, doesn't ever close this. */
+	close_prot_errno_disarm(new_rootfs.dfd_idmapped);
 	lxc_storage_put(c->lxc_conf);
 
 	return new;
@@ -524,6 +526,7 @@ struct lxc_storage *storage_copy(struct lxc_container *c, const char *cname,
 	storage_put(new);
 
 on_error_put_orig:
+	close_prot_errno_disarm(new_rootfs.dfd_idmapped);
 	lxc_storage_put(c->lxc_conf);
 
 	return NULL;

From da651f1d3c8ec8c6e73fd8ca1dccf4e8fcb5d20f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber@stgraber.org>
Date: Tue, 28 Apr 2026 10:11:56 -0400
Subject: [PATCH 23/29] Merge pull request #4677 from hallyn/2026-04-28/leakfd

Don't leak an open fd

From 35faadf2c82cb7b445d4e7087b45accd74b5b3f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber@stgraber.org>
Date: Tue, 28 Apr 2026 10:12:26 -0400
Subject: [PATCH 24/29] Merge pull request #4676 from hallyn/2026-04-28/tp

lvm.c: make sure tp gets freed

From 1974ca441b69cf4499c20575a61ec80813c2b0e9 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serge@hallyn.com>
Date: Mon, 20 Apr 2026 23:07:47 -0500
Subject: [PATCH 25/29] lxc-user-nic: clarify and fix

Some variable names were a bit confusing in find_line and cull_entries.
Rename and document, and fix the flows using these.

It's possible that a more maintainable approach, long term, would be
to break these up differently: have one function create a neat
in memory data structure representing the files, and have the paths
currently using find_line and cull_entries peek into the data structures.
But i think this is pretty clear.

This fixes CVE-2026-39402

Signed-off-by: Serge E. Hallyn <serge@hallyn.com>
Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
---
 src/lxc/cmd/lxc_user_nic.c | 75 +++++++++++++++++++++++++++++---------
 1 file changed, 57 insertions(+), 18 deletions(-)

diff --git a/src/lxc/cmd/lxc_user_nic.c b/src/lxc/cmd/lxc_user_nic.c
index 98aedf8216..83fd84a185 100644
--- a/src/lxc/cmd/lxc_user_nic.c
+++ b/src/lxc/cmd/lxc_user_nic.c
@@ -374,19 +374,58 @@ static char *get_eow(char *s, char *e)
 	return s;
 }
 
+static bool same_word(const char *start, const char *end, const char *word)
+{
+	size_t wordlen = strlen(word);
+	size_t buflen = end - start;
+
+	if (wordlen != buflen)
+		return false;
+	if (strncmp(start, word, wordlen) == 0)
+		return true;
+	return false;
+}
+
+/*
+ * in:
+ * @buf_start and @buf_end point to the buffer to be read.
+ *
+ * @owner_name is the name of the user who should own the link.
+ *
+ * @net_type is type of connection, e.g. veth
+ *
+ * @net_link is the name of the bridge, e.g. lxcbr0, on which the
+ * device should live.
+ *
+ * @net_dev is the name of the device itself in the host netns.
+ *
+ * out:
+ * @is_owner is set to true if the current line is owned by @name.
+
+ * @nic_found is set to true if the line is specifically for the passed-in
+ * @net_dev, and it is on the right @net_link and of the right @net_type.
+ *
+ * @exists is set to false if the nic in this line no longer exists.  This is
+ * used by cull_entries(): if we set it to false, then this line will be
+ * removed from the LXC_USERNIC_DB (e.g. /var/run/lxc/nics).
+ */
 static char *find_line(char *buf_start, char *buf_end, char *name,
 		       char *net_type, char *net_link, char *net_dev,
-		       bool *owner, bool *found, bool *keep)
+		       bool *is_owner, bool *nic_found, bool *exists)
 {
 	char *end_of_line, *end_of_word, *line;
+	bool right_net_type, right_bridge, right_link_name;;
 
 	while (buf_start < buf_end) {
 		size_t len;
 		char netdev_name[IFNAMSIZ];
 
-		*found = false;
-		*keep = true;
-		*owner = false;
+		*nic_found = false;
+		*exists = true;
+		*is_owner = false;
+		right_net_type  = false;
+		right_bridge    = false;
+		right_link_name = false;
 
 		end_of_line = get_eol(buf_start, buf_end);
 		if (end_of_line >= buf_end)
@@ -405,11 +444,8 @@ static char *find_line(char *buf_start, char *buf_end, char *name,
 		if (!end_of_word)
 			return NULL;
 
-		if (strncmp(buf_start, name, strlen(name)))
-			*found = false;
-		else
-			if (strlen(name) == (size_t)(end_of_word - buf_start))
-				*owner = true;
+		if (same_word(buf_start, end_of_word, name))
+			*is_owner = true;
 
 		buf_start = end_of_word + 1;
 		while ((buf_start < buf_end) && isblank(*buf_start))
@@ -421,8 +457,8 @@ static char *find_line(char *buf_start, char *buf_end, char *name,
 		if (!end_of_word)
 			return NULL;
 
-		if (strncmp(buf_start, net_type, strlen(net_type)))
-			*found = false;
+		if (same_word(buf_start, end_of_word, net_type))
+			right_net_type = true;
 
 		buf_start = end_of_word + 1;
 		while ((buf_start < buf_end) && isblank(*buf_start))
@@ -434,8 +470,8 @@ static char *find_line(char *buf_start, char *buf_end, char *name,
 		if (!end_of_word)
 			return NULL;
 
-		if (strncmp(buf_start, net_link, strlen(net_link)))
-			*found = false;
+		if (same_word(buf_start, end_of_word, net_link))
+			right_bridge = true;
 
 		buf_start = end_of_word + 1;
 		while ((buf_start < buf_end) && isblank(*buf_start))
@@ -454,10 +490,13 @@ static char *find_line(char *buf_start, char *buf_end, char *name,
 
 		memcpy(netdev_name, buf_start, len);
 		netdev_name[len] = '\0';
-		*keep = lxc_nic_exists(netdev_name);
+		*exists = lxc_nic_exists(netdev_name);
 
 		if (net_dev && !strcmp(netdev_name, net_dev))
-			*found = true;
+			right_link_name = true;
+
+		if (right_net_type && right_bridge && right_link_name)
+			*nic_found = true;
 
 		return line;
 
@@ -587,7 +626,7 @@ static bool cull_entries(int fd, char *name, char *net_type, char *net_link,
 	size_t length = 0;
 	int ret;
 	char *buf_end, *buf_start;
-	bool found, keep;
+	bool nic_found, is_owner, keep;
 
 	ret = fd_to_buf(fd, &buf, &length);
 	if (ret < 0) {
@@ -603,7 +642,7 @@ static bool cull_entries(int fd, char *name, char *net_type, char *net_link,
 	buf_start = buf;
 	buf_end = buf + length;
 	while ((buf_start = find_line(buf_start, buf_end, name, net_type,
-				      net_link, net_dev, &(bool){true}, &found,
+				      net_link, net_dev, &is_owner, &nic_found,
 				      &keep))) {
 		struct entry_line *newe;
 
@@ -611,7 +650,7 @@ static bool cull_entries(int fd, char *name, char *net_type, char *net_link,
 		if (!newe)
 			return false;
 
-		if (found)
+		if (nic_found && is_owner)
 			*found_nicname = true;
 
 		entry_lines = newe;

From 20acae8e8fc8792f314fd276ebab29eba1206ee0 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serge@hallyn.com>
Date: Mon, 20 Apr 2026 23:08:17 -0500
Subject: [PATCH 26/29] usernic: add a test for ovs port deletion permission

Signed-off-by: Serge E. Hallyn <serge@hallyn.com>
Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
---
 src/tests/lxc-test-usernic-2.in | 129 ++++++++++++++++++++++++++++++++
 src/tests/meson.build           |   7 ++
 2 files changed, 136 insertions(+)
 create mode 100755 src/tests/lxc-test-usernic-2.in

diff --git a/src/tests/lxc-test-usernic-2.in b/src/tests/lxc-test-usernic-2.in
new file mode 100755
index 0000000000..567ac6e5a2
--- /dev/null
+++ b/src/tests/lxc-test-usernic-2.in
@@ -0,0 +1,129 @@
+#!/bin/bash
+
+# SPDX-License-Identifier: LGPL-2.1+
+
+# lxc: linux Container library
+#
+# This tests verifies that lxc-user-nic can't be used by an
+# unprivileged user to delete another user's ovs nics.
+#
+# This test assumes an Ubuntu host
+
+DONE=0
+LXC_USER_NIC="@LIBEXECDIR@/lxc/lxc-user-nic"
+
+apt-get -y install openvswitch-switch
+
+run_cmd() {
+	u=$1
+	shift
+	sudo -i -u $u \
+	    env http_proxy=${http_proxy:-} https_proxy=${https_proxy:-} \
+	        XDG_RUNTIME_DIR=/run/user/$(id -u $u) ASAN_OPTIONS=${ASAN_OPTIONS:-} \
+		UBSAN_OPTIONS=${UBSAN_OPTIONS:-} $*
+}
+
+cleanup() {
+	set +e
+
+	(
+		run_cmd usernic-first "lxc-stop -n b1 -k"
+		run_cmd usernic-second "lxc-stop -n b1 -k"
+		sed -i '/usernic-first/d' /run/lxc/nics /etc/lxc/lxc-usernet
+		sed -i '/usernic-second/d' /run/lxc/nics /etc/lxc/lxc-usernet
+		ovs-vsctl del-br usernic-vs
+
+		pkill -u $(id -u usernic-first) -9
+		pkill -u $(id -u usernic-second) -9
+
+		rm -rf /tmp/usernic-test
+		rm -rf /home/usernic-first /run/user/$(id -u usernic-first)
+		rm -rf /home/usernic-second /run/user/$(id -u usernic-second)
+
+		deluser usernic-first
+		deluser usernic-second
+	) >/dev/null 2>&1
+
+	if [ "$DONE" = "1" ]; then
+		echo "PASS"
+		exit 0
+	fi
+
+	echo "FAIL"
+	exit 1
+}
+
+set -eux
+trap cleanup EXIT SIGHUP SIGINT SIGTERM
+
+# create a test user
+deluser usernic-first || true
+useradd usernic-first
+mkdir -p /home/usernic-first
+chown usernic-first: /home/usernic-first
+usermod -v 910000-919999 -w 910000-919999 usernic-first
+
+mkdir -p /home/usernic-first/.config/lxc/
+cat > /home/usernic-first/.config/lxc/default.conf << EOF
+lxc.net.0.type = veth
+lxc.net.0.link = usernic-vs
+lxc.net.0.flags = up
+lxc.idmap = u 0 910000 10000
+lxc.idmap = g 0 910000 10000
+EOF
+
+deluser usernic-second || true
+useradd usernic-second
+mkdir -p /home/usernic-second
+chown usernic-second: /home/usernic-second
+usermod -v 920000-929999 -w 920000-929999 usernic-second
+
+mkdir -p /home/usernic-second/.config/lxc/
+cat > /home/usernic-second/.config/lxc/default.conf << EOF
+lxc.net.0.type = veth
+lxc.net.0.link = usernic-vs
+lxc.net.0.flags = up
+lxc.idmap = u 0 920000 10000
+lxc.idmap = g 0 920000 10000
+lxc.apparmor.profile = lxc-container-default-with-nesting
+EOF
+
+mkdir -p /run/user/$(id -u usernic-first) /run/user/$(id -u usernic-second)
+chown -R usernic-first: /run/user/$(id -u usernic-first) /home/usernic-first
+chown -R usernic-second: /run/user/$(id -u usernic-second) /home/usernic-second
+
+ovs-vsctl add-br usernic-vs
+
+# Give each a quota of one nic on this bridge
+touch /etc/lxc/lxc-usernet
+sed -i '/^usernic-first/d' /etc/lxc/lxc-usernet
+sed -i '/^usernic-second/d' /etc/lxc/lxc-usernet
+echo "usernic-second veth usernic-vs 1" >> /etc/lxc/lxc-usernet
+echo "usernic-first veth usernic-vs 1" >> /etc/lxc/lxc-usernet
+
+run_cmd usernic-first "lxc-create -t busybox -n b1"
+run_cmd usernic-first "lxc-start -n b1 -d"
+run_cmd usernic-first "lxc-wait -n b1 -s RUNNING"
+p1=$(run_cmd usernic-first "lxc-info -n b1 -p -H")
+
+run_cmd usernic-second "lxc-create -t busybox -n b1"
+run_cmd usernic-second "lxc-start -n b1 -d"
+run_cmd usernic-second "lxc-wait -n b1 -s RUNNING"
+p2=$(run_cmd usernic-second "lxc-info -n b1 -p -H")
+
+ovs-vsctl list-ports usernic-vs
+n1=$(ovs-vsctl list-ports usernic-vs | wc -l)
+if [[ $n1 -ne 2 ]]; then
+	echo "wrong number of nics"
+	cleanup 1
+fi
+
+dev=$(grep usernic-first /run/lxc/nics | cut -f 4 -d\ )
+if run_cmd usernic-second \
+	"$LXC_USER_NIC delete xx xx /proc/$p2/ns/net veth usernic-vs $dev"; then
+	echo "FAIL: unpriv user could unlink another user's ovs port"
+	cleanup 1
+fi
+
+echo "All tests passed"
+DONE=1
diff --git a/src/tests/meson.build b/src/tests/meson.build
index 2b997b73f5..5fc7147fce 100644
--- a/src/tests/meson.build
+++ b/src/tests/meson.build
@@ -401,6 +401,13 @@ if want_tests
         input: 'lxc-test-usernic.in',
         output: 'lxc-test-usernic')
 
+    test_programs += configure_file(
+        configuration: conf,
+        install: true,
+        install_dir: bindir,
+        input: 'lxc-test-usernic-2.in',
+        output: 'lxc-test-usernic-2')
+
     test_programs += configure_file(
         configuration: dummy_config_data,
         install: true,

From 5c26ff09ccc48d5f9704aa38ede38afd77c640d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber@stgraber.org>
Date: Wed, 29 Apr 2026 18:15:10 -0400
Subject: [PATCH 27/29] Merge pull request #4678 from stgraber/security

Fix security issue with lxc-user-nic and OpenVswitch networks

From 623163302f427e7722e2c459b27a7b80cd092624 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber@stgraber.org>
Date: Thu, 30 Apr 2026 00:16:55 +0200
Subject: [PATCH 28/29] Release LXC 7.0.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber@stgraber.org>
---
 meson.build | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/meson.build b/meson.build
index ebbd560053..47f75e1b97 100644
--- a/meson.build
+++ b/meson.build
@@ -4,7 +4,7 @@
 project(
     'lxc',
     'c',
-    version: '6.0.0',
+    version: '7.0.0',
     license: 'LGPLv2+',
     default_options: [
         'b_lto=true',
@@ -26,14 +26,14 @@ liblxc_dependencies = []
 oss_fuzz_dependencies = []
 
 # Version.
-liblxc_version = '1.8.0'
+liblxc_version = '1.9.0'
 version_data = configuration_data()
-version_data.set('LXC_VERSION_MAJOR', '6')
+version_data.set('LXC_VERSION_MAJOR', '7')
 version_data.set('LXC_VERSION_MINOR', '0')
 version_data.set('LXC_VERSION_MICRO', '0')
 version_data.set('LXC_VERSION_BETA', '')
 version_data.set('LXC_ABI', liblxc_version)
-version_data.set('LXC_DEVEL', '1')
+version_data.set('LXC_DEVEL', '0')
 version_data.set('LXC_VERSION', meson.project_version())
 
 # Path handling.

From d787c1aa0d2547e9e9691663dbaef4282c4e7217 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber@stgraber.org>
Date: Tue, 2 Apr 2024 23:34:20 -0400
Subject: [PATCH 29/29] meson: Set DEVEL flag post release
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber@stgraber.org>
---
 meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 47f75e1b97..2098dbfe74 100644
--- a/meson.build
+++ b/meson.build
@@ -33,7 +33,7 @@ version_data.set('LXC_VERSION_MINOR', '0')
 version_data.set('LXC_VERSION_MICRO', '0')
 version_data.set('LXC_VERSION_BETA', '')
 version_data.set('LXC_ABI', liblxc_version)
-version_data.set('LXC_DEVEL', '0')
+version_data.set('LXC_DEVEL', '1')
 version_data.set('LXC_VERSION', meson.project_version())
 
 # Path handling.