From 0ea6e166b4d34f8ae3eaa3d6b5ac3d9b371ef2ab Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Mon, 15 Jun 2026 17:01:45 -0400 Subject: [PATCH 1/9] feat: compute live container status instead of static DB column Resolves #350. Container status was a static column updated only by the create/reconfigure job scripts, so it drifted from reality whenever Proxmox changed out of band or a job died mid-run. This replaces it with status computed on demand. New utils/container-status.js resolves status from three sources: - Proxmox: does the LXC exist, and is it running/stopped? - Jobs: active create/reconfigure job, or did the last create job fail? - Config: does the live LXC config match what the API server expects? Resolved statuses: running, offline, creating, restarting, failed, missing, out-of-sync (plus unknown when Proxmox is unreachable). New endpoint: GET /api/v1/sites/:siteId/containers/:id/status -> { data: { status } } Non-breaking: the same live status is still embedded on the list/show/create payloads, so existing consumers keep working. The static Containers.status column is dropped (migration + model). All former writers were updated to stop persisting status; templates.js now selects provisioned containers by VMID + IPv4 instead of status='running'. The list page queries the per-container status endpoint (seeded by the list value for instant paint, polling while creating/restarting). OpenAPI and README updated. --- create-a-container/README.md | 18 +- create-a-container/bin/create-container.js | 31 +- create-a-container/bin/json-to-sql.js | 2 - .../bin/reconfigure-container.js | 17 +- create-a-container/client/src/lib/queries.ts | 5 + create-a-container/client/src/lib/types.ts | 22 +- .../pages/containers/ContainersListPage.tsx | 62 ++- .../20260615000000-remove-container-status.js | 26 ++ create-a-container/models/container.js | 5 - create-a-container/openapi.v1.yaml | 44 ++- .../routers/api/v1/containers.js | 60 ++- create-a-container/routers/api/v1/nodes.js | 1 - create-a-container/routers/templates.js | 14 +- create-a-container/utils/container-status.js | 355 ++++++++++++++++++ 14 files changed, 601 insertions(+), 61 deletions(-) create mode 100644 create-a-container/migrations/20260615000000-remove-container-status.js create mode 100644 create-a-container/utils/container-status.js diff --git a/create-a-container/README.md b/create-a-container/README.md index 4c135ef0..31fb3d2f 100644 --- a/create-a-container/README.md +++ b/create-a-container/README.md @@ -22,7 +22,6 @@ erDiagram int id PK string hostname UK "FQDN hostname" string username "Owner username" - string status "pending,creating,running,failed" string template "Template name" int creationJobId FK "References Job" int nodeId FK "References Node" @@ -231,6 +230,23 @@ Delete a container from both Proxmox and the database - `403` - User doesn't own the container - `500` - Proxmox API deletion failed or node not configured +#### `GET /api/v1/sites/:siteId/containers/:id/status` (Auth Required) +Return the **live** status of a container, computed on demand rather than read +from a stored column. The status is resolved by combining the container's +presence/run-state in Proxmox, any active create/reconfigure jobs, and whether +the live LXC config matches what the API server expects. +- **Returns**: `{ data: { status } }` where `status` is one of: + - `running` — online in Proxmox + - `offline` — exists in Proxmox but stopped + - `creating` — no Proxmox VM yet, active create job + - `restarting` — active reconfigure job + - `failed` — no Proxmox VM, last create job failed + - `missing` — no Proxmox VM, create succeeded or no create job found + - `out-of-sync` — exists in Proxmox but its config differs from expectation + - `unknown` — Proxmox unreachable / node has no API credentials +- **Note**: The same `status` value is also embedded on each container returned + by the list and show endpoints, so existing consumers remain non-breaking. + #### `GET /status/:jobId` (Auth Required) View container creation progress page diff --git a/create-a-container/bin/create-container.js b/create-a-container/bin/create-container.js index 68694e25..c7672e63 100755 --- a/create-a-container/bin/create-container.js +++ b/create-a-container/bin/create-container.js @@ -177,9 +177,14 @@ async function main() { console.error(`Container with ID ${containerId} not found`); process.exit(1); } - - if (container.status !== 'pending') { - console.error(`Container is not in pending status (current: ${container.status})`); + + // Guard against double-provisioning. The status column no longer exists, so + // we use the Proxmox VMID as the signal: once a VMID has been allocated and + // stored, creation has already run for this container. + if (container.containerId) { + console.error( + `Container already has a Proxmox VMID (${container.containerId}); refusing to re-create`, + ); process.exit(1); } @@ -217,10 +222,6 @@ async function main() { console.log(`Template type: ${isDocker ? 'Docker image' : 'Proxmox template'}`); try { - // Update status to 'creating' - await container.update({ status: 'creating' }); - console.log('Status updated to: creating'); - // Get the Proxmox API client const client = await node.api(); console.log('Proxmox API client initialized'); @@ -428,8 +429,7 @@ async function main() { console.log('Updating container record...'); await container.update({ macAddress, - ipv4Address, - status: 'running' + ipv4Address }); console.log('Container creation completed successfully!'); @@ -480,15 +480,10 @@ async function main() { if (err.response?.data) { console.error('API Error Details:', JSON.stringify(err.response.data, null, 2)); } - - // Update status to failed - try { - await container.update({ status: 'failed' }); - console.log('Status updated to: failed'); - } catch (updateErr) { - console.error('Failed to update container status:', updateErr.message); - } - + + // The container status is no longer persisted. Failure is recorded by the + // job-runner marking this job 'failure'; the live status resolver maps a + // missing-in-Proxmox container whose last create job failed to 'failed'. process.exit(1); } } diff --git a/create-a-container/bin/json-to-sql.js b/create-a-container/bin/json-to-sql.js index 94c02243..f55af4d5 100644 --- a/create-a-container/bin/json-to-sql.js +++ b/create-a-container/bin/json-to-sql.js @@ -222,7 +222,6 @@ async function run() { hostname, ipv4Address: obj.ip, username: obj.user || '', - status: 'running', template: obj.template || null, containerId: obj.ctid, macAddress: obj.mac, @@ -234,7 +233,6 @@ async function run() { await container.update({ ipv4Address: obj.ip, username: obj.user || '', - status: container.status || 'running', template: obj.template || container.template, containerId: obj.ctid, macAddress: obj.mac diff --git a/create-a-container/bin/reconfigure-container.js b/create-a-container/bin/reconfigure-container.js index ee5ba86e..6e31a248 100644 --- a/create-a-container/bin/reconfigure-container.js +++ b/create-a-container/bin/reconfigure-container.js @@ -160,14 +160,13 @@ async function main() { throw new Error('Could not get IP address from Proxmox interfaces API'); } - // Update container record with MAC/IP and running status + // Update container record with MAC/IP (status is no longer persisted) await container.update({ - status: 'running', macAddress, ipv4Address }); - console.log('Status updated to: running'); + console.log('Reconfiguration applied; container running'); console.log(` MAC: ${macAddress}`); console.log(` IP: ${ipv4Address}`); } @@ -181,15 +180,9 @@ async function main() { if (err.response?.data) { console.error('API Error Details:', JSON.stringify(err.response.data, null, 2)); } - - // Update status to failed - try { - await container.update({ status: 'failed' }); - console.log('Status updated to: failed'); - } catch (updateErr) { - console.error('Failed to update container status:', updateErr.message); - } - + + // Container status is no longer persisted. The job-runner records this job as + // 'failure'; live status is derived from Proxmox + job state on read. process.exit(1); } } diff --git a/create-a-container/client/src/lib/queries.ts b/create-a-container/client/src/lib/queries.ts index b261f2ec..546db6be 100644 --- a/create-a-container/client/src/lib/queries.ts +++ b/create-a-container/client/src/lib/queries.ts @@ -8,6 +8,7 @@ import type { Container, ContainerMetadata, ContainerNewBootstrap, + ContainerStatusResult, EffectiveResources, ExternalDomain, Group, @@ -29,6 +30,8 @@ export const keys = { containers: (siteId: number | string) => ['sites', String(siteId), 'containers'] as const, container: (siteId: number | string, id: number | string) => ['sites', String(siteId), 'containers', String(id)] as const, + containerStatus: (siteId: number | string, id: number | string) => + ['sites', String(siteId), 'containers', String(id), 'status'] as const, containerBootstrap: (siteId: number | string) => ['sites', String(siteId), 'containers', 'new'] as const, containerMetadata: (image: string) => ['container-metadata', image] as const, @@ -64,6 +67,8 @@ export const queries = { api.get(`/api/v1/sites/${siteId}/containers`), getContainer: (siteId: number | string, id: number | string) => api.get(`/api/v1/sites/${siteId}/containers/${id}`), + getContainerStatus: (siteId: number | string, id: number | string) => + api.get(`/api/v1/sites/${siteId}/containers/${id}/status`), containerBootstrap: (siteId: number | string) => api.get(`/api/v1/sites/${siteId}/containers/new`), containerMetadata: (siteId: number | string, image: string) => diff --git a/create-a-container/client/src/lib/types.ts b/create-a-container/client/src/lib/types.ts index 85701ab3..a32fd6cd 100644 --- a/create-a-container/client/src/lib/types.ts +++ b/create-a-container/client/src/lib/types.ts @@ -75,7 +75,7 @@ export interface Container { hostname: string; ipv4Address: string | null; macAddress: string | null; - status: string; + status: ContainerStatus; template: string | null; creationJobId: number | null; entrypoint: string | null; @@ -90,11 +90,29 @@ export interface Container { createdAt: string; } +/** + * Live container status resolved from Proxmox + job state + config drift. + * Returned by GET /sites/:id/containers/:id/status and embedded on Container. + */ +export type ContainerStatus = + | 'running' + | 'offline' + | 'creating' + | 'restarting' + | 'failed' + | 'missing' + | 'out-of-sync' + | 'unknown'; + +export interface ContainerStatusResult { + status: ContainerStatus; +} + export interface ContainerCreateResult { containerId: number; jobId: number; hostname: string; - status: string; + status: ContainerStatus; } export interface ContainerNewBootstrap { diff --git a/create-a-container/client/src/pages/containers/ContainersListPage.tsx b/create-a-container/client/src/pages/containers/ContainersListPage.tsx index ec5a195f..5f899d29 100644 --- a/create-a-container/client/src/pages/containers/ContainersListPage.tsx +++ b/create-a-container/client/src/pages/containers/ContainersListPage.tsx @@ -34,28 +34,76 @@ import { import { api, ApiError } from '@/lib/api'; import { useSession } from '@/lib/auth'; import { keys, queries } from '@/lib/queries'; -import type { Container } from '@/lib/types'; +import type { Container, ContainerStatus } from '@/lib/types'; type ViewMode = 'cards' | 'table'; const VIEW_STORAGE_KEY = 'containers:view'; -function statusVariant(s: string): 'default' | 'success' | 'warning' | 'danger' | 'secondary' { +function statusVariant( + s: ContainerStatus, +): 'default' | 'success' | 'warning' | 'danger' | 'secondary' { switch (s) { case 'running': return 'success'; - case 'pending': + case 'creating': case 'restarting': return 'warning'; case 'failed': - case 'error': + case 'out-of-sync': return 'danger'; - case 'stopped': + case 'offline': + case 'missing': return 'secondary'; default: return 'default'; } } +// Human-readable labels for the live status values. +const STATUS_LABELS: Record = { + running: 'Running', + offline: 'Offline', + creating: 'Creating', + restarting: 'Restarting', + failed: 'Failed', + missing: 'Missing', + 'out-of-sync': 'Out of sync', + unknown: 'Unknown', +}; + +// Statuses that are transitional and worth polling for. +const TRANSITIONAL_STATUSES: ReadonlySet = new Set([ + 'creating', + 'restarting', +]); + +/** + * Live status badge. Per issue #350 each row queries the dedicated + * /containers/:id/status endpoint. The status already returned by the list + * endpoint seeds initialData so the badge paints instantly, then this query + * keeps it fresh (polling while the container is in a transitional state). + */ +function ContainerStatusBadge({ + siteId, + container, +}: { + siteId: string | undefined; + container: Container; +}) { + const { data } = useQuery({ + queryKey: keys.containerStatus(siteId!, container.id), + queryFn: () => queries.getContainerStatus(siteId!, container.id), + enabled: !!siteId, + initialData: { status: container.status }, + refetchInterval: (query) => { + const status = query.state.data?.status; + return status && TRANSITIONAL_STATUSES.has(status) ? 4000 : false; + }, + }); + const status = data?.status ?? container.status; + return {STATUS_LABELS[status] ?? status}; +} + const linkClass = 'text-(--color-primary,#1d4ed8) hover:underline'; /** Shorten a full image ref to just its name+tag, e.g. ghcr.io/mieweb/base:latest -> base:latest */ @@ -330,7 +378,7 @@ export function ContainersListPage() { {c.hostname} - {c.status} +
@@ -374,7 +422,7 @@ export function ContainersListPage() { {c.hostname} - {c.status} + diff --git a/create-a-container/migrations/20260615000000-remove-container-status.js b/create-a-container/migrations/20260615000000-remove-container-status.js new file mode 100644 index 00000000..cdcdc72c --- /dev/null +++ b/create-a-container/migrations/20260615000000-remove-container-status.js @@ -0,0 +1,26 @@ +'use strict'; +/** @type {import('sequelize-cli').Migration} */ + +/** + * Remove the static `status` column from Containers. + * + * Container status is now computed live from Proxmox + job state + config drift + * (see utils/container-status.js and GET /sites/:id/containers/:id/status), so the + * persisted column is no longer a source of truth and is dropped. + * + * The down migration restores the column (defaulting to 'running') for rollback, + * but the historical per-container value cannot be recovered. + */ +module.exports = { + async up(queryInterface) { + await queryInterface.removeColumn('Containers', 'status'); + }, + + async down(queryInterface, Sequelize) { + await queryInterface.addColumn('Containers', 'status', { + type: Sequelize.STRING(20), + allowNull: false, + defaultValue: 'running', + }); + }, +}; diff --git a/create-a-container/models/container.js b/create-a-container/models/container.js index 721cebfe..b6bfc83e 100644 --- a/create-a-container/models/container.js +++ b/create-a-container/models/container.js @@ -268,11 +268,6 @@ module.exports = (sequelize, DataTypes) => { type: DataTypes.STRING(255), allowNull: false }, - status: { - type: DataTypes.STRING(20), - allowNull: false, - defaultValue: 'pending' - }, template: { type: DataTypes.STRING(255), allowNull: true diff --git a/create-a-container/openapi.v1.yaml b/create-a-container/openapi.v1.yaml index e049dd16..ec476381 100644 --- a/create-a-container/openapi.v1.yaml +++ b/create-a-container/openapi.v1.yaml @@ -89,7 +89,7 @@ components: id: { type: integer } hostname: { type: string } containerId: { type: integer, nullable: true } - status: { type: string } + status: { $ref: '#/components/schemas/ContainerStatus' } template: { type: string } ipv4Address: { type: string, nullable: true } macAddress: { type: string, nullable: true } @@ -104,6 +104,28 @@ components: services: type: array items: { type: object } + ContainerStatus: + type: string + description: >- + Live container status, resolved on read from Proxmox presence/run-state, + active jobs, and configuration drift. + running = online in Proxmox; + offline = exists in Proxmox but stopped; + creating = no Proxmox VM yet, active create job; + restarting = active reconfigure job; + failed = no Proxmox VM, last create job failed; + missing = no Proxmox VM, create succeeded or no create job; + out-of-sync = exists in Proxmox but config differs from expectation; + unknown = Proxmox unreachable / no node credentials. + enum: + - running + - offline + - creating + - restarting + - failed + - missing + - out-of-sync + - unknown Node: type: object properties: @@ -338,6 +360,26 @@ paths: get: { tags: [Containers], responses: { '200': { description: Container } } } put: { tags: [Containers], responses: { '200': { description: Updated, optional restart job } } } delete: { tags: [Containers], responses: { '200': { description: Deleted, with DNS cleanup warnings } } } + /sites/{siteId}/containers/{id}/status: + get: + tags: [Containers] + summary: Live container status (computed from Proxmox + jobs + config drift) + parameters: + - { in: path, name: siteId, required: true, schema: { type: integer } } + - { in: path, name: id, required: true, schema: { type: integer } } + responses: + '200': + description: Resolved container status + content: + application/json: + schema: + type: object + properties: + data: + type: object + properties: + status: { $ref: '#/components/schemas/ContainerStatus' } + '404': { description: Container not found } /sites/{siteId}/nodes: parameters: [{ in: path, name: siteId, required: true, schema: { type: integer } }] diff --git a/create-a-container/routers/api/v1/containers.js b/create-a-container/routers/api/v1/containers.js index 70c2947d..48a9c3bc 100644 --- a/create-a-container/routers/api/v1/containers.js +++ b/create-a-container/routers/api/v1/containers.js @@ -21,6 +21,11 @@ const { const { parseDockerRef, getImageConfig, extractImageMetadata } = require('../../../utils/docker-registry'); const { manageDnsRecords } = require('../../../utils/cloudflare-dns'); const { deleteVirtualMachine, withNetbox } = require('../../../utils/netbox'); +const { + computeContainerStatus, + computeContainerStatuses, + STATUS, +} = require('../../../utils/container-status'); const { apiAuth, asyncHandler, ok, created, ApiError } = require('../../../middlewares/api'); const router = express.Router({ mergeParams: true }); @@ -86,7 +91,7 @@ async function loadSite(req) { return site; } -function serializeContainer(c, site) { +function serializeContainer(c, site, status) { const services = c.services || []; const ssh = services.find( (s) => @@ -110,7 +115,9 @@ function serializeContainer(c, site) { hostname: c.hostname, ipv4Address: c.ipv4Address, macAddress: c.macAddress, - status: c.status, + // Live status computed from Proxmox + jobs + config (see utils/container-status). + // Kept on every container payload so existing consumers remain non-breaking. + status: status || STATUS.UNKNOWN, template: c.template, creationJobId: c.creationJobId, entrypoint: c.entrypoint, @@ -202,10 +209,41 @@ router.get( { association: 'dnsService' }, ], }, - { association: 'node', attributes: ['id', 'name', 'apiUrl'] }, + // Full node record (incl. credentials) is required to query live Proxmox status. + { association: 'node' }, ], }); - return ok(res, rows.map((c) => serializeContainer(c, site))); + // Resolve live statuses for the whole page in one pass: one Proxmox snapshot + // per node (shared), rather than N independent round-trips from the browser. + const statuses = await computeContainerStatuses(rows, Job); + return ok( + res, + rows.map((c) => serializeContainer(c, site, statuses.get(c.id))), + ); + }), +); + +// GET /containers/:id/status — live status for a single container +router.get( + '/:id/status', + asyncHandler(async (req, res) => { + const site = await loadSite(req); + const containerId = parseInt(req.params.id, 10); + if (!Number.isInteger(containerId) || containerId <= 0) { + throw new ApiError(404, 'not_found', 'Container not found'); + } + const c = await Container.findOne({ + where: { id: containerId, username: req.session.user }, + include: [ + { association: 'node' }, + { association: 'creationJob' }, + ], + }); + if (!c || !c.node || c.node.siteId !== site.id) { + throw new ApiError(404, 'not_found', 'Container not found'); + } + const status = await computeContainerStatus({ container: c, Job }); + return ok(res, { status }); }), ); @@ -233,12 +271,14 @@ router.get( ], }, { association: 'node' }, + { association: 'creationJob' }, ], }); if (!c || !c.node || c.node.siteId !== site.id) { throw new ApiError(404, 'not_found', 'Container not found'); } - return ok(res, serializeContainer(c, site)); + const status = await computeContainerStatus({ container: c, Job }); + return ok(res, serializeContainer(c, site, status)); }), ); @@ -296,7 +336,6 @@ router.post( { hostname, username: req.session.user, - status: 'pending', template: templateName, nodeId: node.id, siteId: site.id, @@ -371,7 +410,9 @@ router.post( containerId: container.id, jobId: job.id, hostname: container.hostname, - status: 'pending', + // A create job was just enqueued and there is no Proxmox VMID yet, so the + // live status resolver would report 'creating' — return it directly. + status: STATUS.CREATING, }); } catch (err) { await t.rollback(); @@ -414,16 +455,15 @@ router.put( const dnsWarnings = []; await sequelize.transaction(async (t) => { if (envChanged || entrypointChanged) { + // Persist the new desired config only. The "restarting" state is no + // longer stored — it is derived from the active reconfigure job below. await container.update( { environmentVars: envVarsJson, entrypoint: newEntrypoint, - status: needsRestart && container.containerId ? 'restarting' : container.status, }, { transaction: t }, ); - } else if (forceRestart && container.containerId) { - await container.update({ status: 'restarting' }, { transaction: t }); } if (needsRestart && container.containerId) { restartJob = await Job.create( diff --git a/create-a-container/routers/api/v1/nodes.js b/create-a-container/routers/api/v1/nodes.js index d01e097d..2cbb3303 100644 --- a/create-a-container/routers/api/v1/nodes.js +++ b/create-a-container/routers/api/v1/nodes.js @@ -247,7 +247,6 @@ router.post( containerId: c.vmid, macAddress, ipv4Address, - status: 'running', }; }), ); diff --git a/create-a-container/routers/templates.js b/create-a-container/routers/templates.js index 4793ba3a..4969dde5 100644 --- a/create-a-container/routers/templates.js +++ b/create-a-container/routers/templates.js @@ -1,9 +1,19 @@ const express = require('express'); +const { Op } = require('sequelize'); const { Site, Node, Container, Service, HTTPService, TransportService, ExternalDomain } = require('../models'); const { requireLocalhostOrAdmin } = require('../middlewares'); const router = express.Router(); +// A container is eligible for routing/DNS config once it has been provisioned, +// i.e. it has a Proxmox VMID and an assigned IPv4 address. The old code filtered +// on the (now-removed) static status column; presence of an IP/VMID is the stable, +// Proxmox-independent proxy for "this container is live enough to route to". +const PROVISIONED_CONTAINER_WHERE = { + containerId: { [Op.ne]: null }, + ipv4Address: { [Op.ne]: null }, +}; + async function loadDnsmasqSite(siteId) { return Site.findByPk(siteId, { include: [{ @@ -12,7 +22,7 @@ async function loadDnsmasqSite(siteId) { include: [{ model: Container, as: 'containers', - where: { status: 'running' }, + where: PROVISIONED_CONTAINER_WHERE, required: false, attributes: ['macAddress', 'ipv4Address', 'hostname'], }], @@ -40,7 +50,7 @@ router.get('/sites/:siteId/nginx', requireLocalhostOrAdmin, async (req, res) => include: [{ model: Container, as: 'containers', - where: { status: 'running' }, + where: PROVISIONED_CONTAINER_WHERE, required: false, include: [{ model: Service, diff --git a/create-a-container/utils/container-status.js b/create-a-container/utils/container-status.js new file mode 100644 index 00000000..735ec22b --- /dev/null +++ b/create-a-container/utils/container-status.js @@ -0,0 +1,355 @@ +/** + * container-status.js — resolve the *live* status of a container. + * + * Historically the container "status" was a static column in the database that + * was only mutated by the create/reconfigure job scripts. That value drifts out + * of reality whenever something changes a container directly in Proxmox (or when + * a job dies without updating the row). This module computes the real status on + * demand by combining three sources of truth: + * + * 1. Proxmox — does the LXC exist, and is it running or stopped? + * 2. Jobs — is there an active create/restart job, or did the last create + * job fail? + * 3. Config — does the live LXC config match what the API server expects? + * + * Resolved statuses (a strict superset of the old running/offline values): + * running — exists in Proxmox and is online + * offline — exists in Proxmox but is stopped + * restarting — has an active (pending/running) reconfigure job + * creating — not in Proxmox, but has an active (pending/running) create job + * failed — not in Proxmox, last create job returned failure + * missing — not in Proxmox, create job succeeded or no create job found + * out-of-sync — exists in Proxmox but its config doesn't match expectation + * unknown — Proxmox could not be reached / node has no API credentials + */ + +const { Op } = require('sequelize'); + +const STATUS = Object.freeze({ + RUNNING: 'running', + OFFLINE: 'offline', + CREATING: 'creating', + RESTARTING: 'restarting', + FAILED: 'failed', + MISSING: 'missing', + OUT_OF_SYNC: 'out-of-sync', + UNKNOWN: 'unknown', +}); + +const STATUS_VALUES = Object.freeze(Object.values(STATUS)); + +// Job command fragments that identify create vs reconfigure (restart) jobs. +// Jobs are distinguished solely by their `command` string (no type column). +const CREATE_CMD = 'bin/create-container.js'; +const RECONFIGURE_CMD = 'bin/reconfigure-container.js'; + +const ACTIVE_JOB_STATUSES = ['pending', 'running']; + +function nodeHasCreds(node) { + return !!(node && node.apiUrl && node.tokenId && node.secret); +} + +/** + * Find the most recent create job for a container. + * Prefers the explicit creationJobId FK; falls back to a command-string match. + * @param {object} container - Container instance (with optional creationJob assoc) + * @param {object} Job - Job model + * @returns {Promise} + */ +async function findLatestCreateJob(container, Job) { + if (container.creationJob) return container.creationJob; + if (container.creationJobId) { + return Job.findByPk(container.creationJobId); + } + return Job.findOne({ + where: { command: { [Op.like]: `%${CREATE_CMD} --container-id=${container.id}%` } }, + order: [['createdAt', 'DESC']], + }); +} + +/** + * Find the most recent reconfigure (restart) job for a container, if any. + * @param {object} container - Container instance + * @param {object} Job - Job model + * @returns {Promise} + */ +async function findLatestReconfigureJob(container, Job) { + return Job.findOne({ + where: { command: { [Op.like]: `%${RECONFIGURE_CMD} --container-id=${container.id}%` } }, + order: [['createdAt', 'DESC']], + }); +} + +function isActiveJob(job) { + return !!job && ACTIVE_JOB_STATUSES.includes(job.status); +} + +/** + * Parse a NUL-separated Proxmox env string ("K=V\0K2=V2") into an object. + * @param {string|null|undefined} envStr + * @returns {Object} + */ +function parseEnvString(envStr) { + const out = {}; + if (!envStr) return out; + for (const pair of String(envStr).split('\0')) { + if (!pair) continue; + const eq = pair.indexOf('='); + if (eq > 0) out[pair.substring(0, eq)] = pair.substring(eq + 1); + } + return out; +} + +function shallowEqualMap(a, b) { + const ak = Object.keys(a); + const bk = Object.keys(b); + if (ak.length !== bk.length) return false; + for (const k of ak) { + if (a[k] !== b[k]) return false; + } + return true; +} + +/** + * Compare the env/entrypoint the API server expects against the live LXC config. + * + * The expectation is built the same way the reconfigure job builds it + * (`container.buildLxcEnvConfig()`), so this mirrors exactly what *would* be sent + * to Proxmox. Env is compared as an unordered set of key=value pairs because the + * Proxmox API does not preserve ordering. Returns true when the live config + * matches the expectation (i.e. in sync). + * + * @param {object} container - Container instance (provides buildLxcEnvConfig) + * @param {object} liveConfig - Result of ProxmoxApi.lxcConfig(node, vmid) + * @returns {boolean} true if in sync, false if drifted + */ +function configMatches(container, liveConfig) { + const expected = + typeof container.buildLxcEnvConfig === 'function' ? container.buildLxcEnvConfig() : {}; + const expectedDeletes = new Set( + (expected.delete ? String(expected.delete).split(',') : []).map((s) => s.trim()), + ); + + // --- entrypoint --- + const liveEntrypoint = liveConfig?.entrypoint || null; + if (expectedDeletes.has('entrypoint')) { + if (liveEntrypoint) return false; // expected absent, but present live + } else if ((expected.entrypoint || null) !== liveEntrypoint) { + return false; + } + + // --- env --- + const liveEnv = parseEnvString(liveConfig?.env); + if (expectedDeletes.has('env')) { + if (Object.keys(liveEnv).length > 0) return false; // expected none, but some live + } else { + const expectedEnv = parseEnvString(expected.env); + if (!shallowEqualMap(expectedEnv, liveEnv)) return false; + } + + return true; +} + +/** + * Locate a container in a cluster-resources snapshot by VMID. + * @param {Array|null} snapshot - Result of ProxmoxApi.clusterResources('lxc') + * @param {number} vmid + * @returns {object|null} The matching resource entry or null + */ +function findInSnapshot(snapshot, vmid) { + if (!Array.isArray(snapshot) || vmid == null) return null; + return snapshot.find((r) => Number(r.vmid) === Number(vmid)) || null; +} + +/** + * Resolve status from already-gathered facts. Pure function — no I/O — so it is + * trivial to unit test and is the single source of the decision tree. + * + * @param {object} facts + * @param {boolean} facts.proxmoxReachable - Whether Proxmox was queried OK. + * @param {boolean} facts.inProxmox - Whether the LXC exists in Proxmox. + * @param {boolean} facts.proxmoxRunning - Whether the LXC is running. + * @param {boolean} facts.inSync - Whether the live config matches expectation. + * @param {boolean} facts.hasVmid - Whether the container has a Proxmox VMID. + * @param {boolean} facts.creating - Active create job present. + * @param {boolean} facts.restarting - Active reconfigure job present. + * @param {boolean} facts.createFailed - Latest create job returned failure. + * @param {boolean} facts.hasCreateJob - A create job exists at all. + * @returns {string} STATUS value + */ +function decideStatus(facts) { + if (facts.inProxmox) { + if (facts.restarting) return STATUS.RESTARTING; + if (facts.inSync === false) return STATUS.OUT_OF_SYNC; + return facts.proxmoxRunning ? STATUS.RUNNING : STATUS.OFFLINE; + } + + if (facts.creating) return STATUS.CREATING; + if (facts.restarting) return STATUS.RESTARTING; + + // We had a VMID + creds but couldn't reach Proxmox: don't guess missing. + if (facts.hasVmid && !facts.proxmoxReachable) return STATUS.UNKNOWN; + + if (facts.createFailed) return STATUS.FAILED; + return STATUS.MISSING; +} + +/** + * Compute the live status of a single container. + * + * @param {object} params + * @param {object} params.container - Container instance (with `node` association). + * @param {object} params.Job - Job model (from models). + * @param {object} [params.api] - Optional pre-authenticated ProxmoxApi client for + * the container's node (lets callers reuse one client across many containers). + * @param {{ data: Array, ok: boolean }} [params.snapshot] - Optional + * pre-fetched clusterResources('lxc') snapshot for the node. `ok` indicates the + * query succeeded; `data` is the resource list (used for batching the index). + * @returns {Promise} One of the STATUS values. + */ +async function computeContainerStatus({ container, Job, api, snapshot }) { + const node = container.node; + const hasCreds = nodeHasCreds(node); + const hasVmid = container.containerId != null; + + let client = api || null; + async function getClient() { + if (!client) client = await node.api(); + return client; + } + + // --- Determine Proxmox presence / run state --- + let proxmoxReachable = false; + let inProxmox = false; + let proxmoxRunning = false; + + if (hasVmid && hasCreds) { + try { + let resources; + if (snapshot) { + proxmoxReachable = !!snapshot.ok; + resources = snapshot.ok ? snapshot.data : null; + } else { + const c = await getClient(); + resources = await c.clusterResources('lxc'); + proxmoxReachable = true; + } + if (proxmoxReachable) { + const resource = findInSnapshot(resources, container.containerId); + if (resource) { + inProxmox = true; + proxmoxRunning = resource.status === 'running'; + } + } + } catch (err) { + proxmoxReachable = false; + } + } + + // --- Jobs --- + const reconfigureJob = await findLatestReconfigureJob(container, Job); + const restarting = isActiveJob(reconfigureJob); + + // Drift detection (only meaningful if it exists and isn't already restarting). + let inSync = null; + if (inProxmox && !restarting && hasCreds) { + try { + const c = await getClient(); + const liveConfig = await c.lxcConfig(node.name, container.containerId); + inSync = configMatches(container, liveConfig); + } catch (err) { + inSync = null; // can't assert drift + } + } + + // Only look up the create job when we actually need it (container not running + // in Proxmox), to avoid an extra query on the happy path. + let creating = false; + let createFailed = false; + let hasCreateJob = false; + if (!inProxmox) { + const createJob = await findLatestCreateJob(container, Job); + hasCreateJob = !!createJob; + creating = isActiveJob(createJob); + createFailed = !!createJob && createJob.status === 'failure'; + } + + return decideStatus({ + proxmoxReachable, + inProxmox, + proxmoxRunning, + inSync, + hasVmid, + creating, + restarting, + createFailed, + hasCreateJob, + }); +} + +/** + * Compute live statuses for many containers efficiently. + * + * Groups containers by node so each node's Proxmox `clusterResources('lxc')` + * snapshot is fetched exactly once and a single authenticated client is reused + * for that node's containers (including per-container config reads). This keeps + * total end-user latency low for the list page versus N independent calls. + * + * @param {Array} containers - Container instances (each with `node`). + * @param {object} Job - Job model. + * @returns {Promise>} Map of container.id -> STATUS value. + */ +async function computeContainerStatuses(containers, Job) { + const result = new Map(); + + // Group by node id (containers without a node still get resolved, just without + // any Proxmox facts). + const byNode = new Map(); + for (const container of containers) { + const key = container.node ? container.node.id : `__no_node_${container.id}`; + if (!byNode.has(key)) byNode.set(key, []); + byNode.get(key).push(container); + } + + await Promise.all( + Array.from(byNode.values()).map(async (group) => { + const node = group[0].node; + let api = null; + let snapshot = { ok: false, data: null }; + + if (nodeHasCreds(node)) { + try { + api = await node.api(); + const data = await api.clusterResources('lxc'); + snapshot = { ok: true, data }; + } catch (err) { + snapshot = { ok: false, data: null }; + } + } + + // Resolve each container in the group sequentially per node (shares the + // single authenticated client); different nodes run in parallel. + for (const container of group) { + // eslint-disable-next-line no-await-in-loop + const status = await computeContainerStatus({ container, Job, api, snapshot }); + result.set(container.id, status); + } + }), + ); + + return result; +} + +module.exports = { + STATUS, + STATUS_VALUES, + computeContainerStatus, + computeContainerStatuses, + decideStatus, + // exported for reuse / testing + configMatches, + parseEnvString, + findInSnapshot, + findLatestCreateJob, + findLatestReconfigureJob, +}; From 31adb4150f9d5ea4c0197d8ba9de450c86eccea9 Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 08:21:59 -0400 Subject: [PATCH 2/9] Drop dedicated /containers/:id/status endpoint Live status is already embedded in the existing list/show/create container payloads, so the dedicated GET /containers/:id/status endpoint is redundant. Per-row polling + hydration on the index page is not needed at this time, so revert the ContainerStatusBadge to a simple presentational badge that renders the live status from the list response. Removes the endpoint, the client query/key/result type, the OpenAPI path, and the README endpoint section (the status-value reference is retained as a field description). The ContainerStatus enum and the live resolver remain. --- create-a-container/README.md | 30 ++++++++-------- create-a-container/client/src/lib/queries.ts | 5 --- create-a-container/client/src/lib/types.ts | 6 +--- .../pages/containers/ContainersListPage.tsx | 36 +++---------------- .../20260615000000-remove-container-status.js | 4 +-- create-a-container/openapi.v1.yaml | 20 ----------- .../routers/api/v1/containers.js | 24 ------------- 7 files changed, 21 insertions(+), 104 deletions(-) diff --git a/create-a-container/README.md b/create-a-container/README.md index 31fb3d2f..6d6fb80c 100644 --- a/create-a-container/README.md +++ b/create-a-container/README.md @@ -230,22 +230,20 @@ Delete a container from both Proxmox and the database - `403` - User doesn't own the container - `500` - Proxmox API deletion failed or node not configured -#### `GET /api/v1/sites/:siteId/containers/:id/status` (Auth Required) -Return the **live** status of a container, computed on demand rather than read -from a stored column. The status is resolved by combining the container's -presence/run-state in Proxmox, any active create/reconfigure jobs, and whether -the live LXC config matches what the API server expects. -- **Returns**: `{ data: { status } }` where `status` is one of: - - `running` — online in Proxmox - - `offline` — exists in Proxmox but stopped - - `creating` — no Proxmox VM yet, active create job - - `restarting` — active reconfigure job - - `failed` — no Proxmox VM, last create job failed - - `missing` — no Proxmox VM, create succeeded or no create job found - - `out-of-sync` — exists in Proxmox but its config differs from expectation - - `unknown` — Proxmox unreachable / node has no API credentials -- **Note**: The same `status` value is also embedded on each container returned - by the list and show endpoints, so existing consumers remain non-breaking. +#### Container status (`status` field) +Every container returned by the list and show endpoints includes a **live** +`status` field, computed on demand rather than read from a stored column. It is +resolved by combining the container's presence/run-state in Proxmox, any active +create/reconfigure jobs, and whether the live LXC config matches what the API +server expects. Possible values: +- `running` — online in Proxmox +- `offline` — exists in Proxmox but stopped +- `creating` — no Proxmox VM yet, active create job +- `restarting` — active reconfigure job +- `failed` — no Proxmox VM, last create job failed +- `missing` — no Proxmox VM, create succeeded or no create job found +- `out-of-sync` — exists in Proxmox but its config differs from expectation +- `unknown` — Proxmox unreachable / node has no API credentials #### `GET /status/:jobId` (Auth Required) View container creation progress page diff --git a/create-a-container/client/src/lib/queries.ts b/create-a-container/client/src/lib/queries.ts index 546db6be..b261f2ec 100644 --- a/create-a-container/client/src/lib/queries.ts +++ b/create-a-container/client/src/lib/queries.ts @@ -8,7 +8,6 @@ import type { Container, ContainerMetadata, ContainerNewBootstrap, - ContainerStatusResult, EffectiveResources, ExternalDomain, Group, @@ -30,8 +29,6 @@ export const keys = { containers: (siteId: number | string) => ['sites', String(siteId), 'containers'] as const, container: (siteId: number | string, id: number | string) => ['sites', String(siteId), 'containers', String(id)] as const, - containerStatus: (siteId: number | string, id: number | string) => - ['sites', String(siteId), 'containers', String(id), 'status'] as const, containerBootstrap: (siteId: number | string) => ['sites', String(siteId), 'containers', 'new'] as const, containerMetadata: (image: string) => ['container-metadata', image] as const, @@ -67,8 +64,6 @@ export const queries = { api.get(`/api/v1/sites/${siteId}/containers`), getContainer: (siteId: number | string, id: number | string) => api.get(`/api/v1/sites/${siteId}/containers/${id}`), - getContainerStatus: (siteId: number | string, id: number | string) => - api.get(`/api/v1/sites/${siteId}/containers/${id}/status`), containerBootstrap: (siteId: number | string) => api.get(`/api/v1/sites/${siteId}/containers/new`), containerMetadata: (siteId: number | string, image: string) => diff --git a/create-a-container/client/src/lib/types.ts b/create-a-container/client/src/lib/types.ts index a32fd6cd..e4e077d3 100644 --- a/create-a-container/client/src/lib/types.ts +++ b/create-a-container/client/src/lib/types.ts @@ -92,7 +92,7 @@ export interface Container { /** * Live container status resolved from Proxmox + job state + config drift. - * Returned by GET /sites/:id/containers/:id/status and embedded on Container. + * Embedded on each Container returned by the list/show/create endpoints. */ export type ContainerStatus = | 'running' @@ -104,10 +104,6 @@ export type ContainerStatus = | 'out-of-sync' | 'unknown'; -export interface ContainerStatusResult { - status: ContainerStatus; -} - export interface ContainerCreateResult { containerId: number; jobId: number; diff --git a/create-a-container/client/src/pages/containers/ContainersListPage.tsx b/create-a-container/client/src/pages/containers/ContainersListPage.tsx index 5f899d29..834bce6a 100644 --- a/create-a-container/client/src/pages/containers/ContainersListPage.tsx +++ b/create-a-container/client/src/pages/containers/ContainersListPage.tsx @@ -71,36 +71,8 @@ const STATUS_LABELS: Record = { unknown: 'Unknown', }; -// Statuses that are transitional and worth polling for. -const TRANSITIONAL_STATUSES: ReadonlySet = new Set([ - 'creating', - 'restarting', -]); - -/** - * Live status badge. Per issue #350 each row queries the dedicated - * /containers/:id/status endpoint. The status already returned by the list - * endpoint seeds initialData so the badge paints instantly, then this query - * keeps it fresh (polling while the container is in a transitional state). - */ -function ContainerStatusBadge({ - siteId, - container, -}: { - siteId: string | undefined; - container: Container; -}) { - const { data } = useQuery({ - queryKey: keys.containerStatus(siteId!, container.id), - queryFn: () => queries.getContainerStatus(siteId!, container.id), - enabled: !!siteId, - initialData: { status: container.status }, - refetchInterval: (query) => { - const status = query.state.data?.status; - return status && TRANSITIONAL_STATUSES.has(status) ? 4000 : false; - }, - }); - const status = data?.status ?? container.status; +/** Status badge. The status is the live value embedded in the list response. */ +function StatusBadge({ status }: { status: ContainerStatus }) { return {STATUS_LABELS[status] ?? status}; } @@ -378,7 +350,7 @@ export function ContainersListPage() { {c.hostname} - +
@@ -422,7 +394,7 @@ export function ContainersListPage() { {c.hostname} - + diff --git a/create-a-container/migrations/20260615000000-remove-container-status.js b/create-a-container/migrations/20260615000000-remove-container-status.js index cdcdc72c..39d7b50e 100644 --- a/create-a-container/migrations/20260615000000-remove-container-status.js +++ b/create-a-container/migrations/20260615000000-remove-container-status.js @@ -5,8 +5,8 @@ * Remove the static `status` column from Containers. * * Container status is now computed live from Proxmox + job state + config drift - * (see utils/container-status.js and GET /sites/:id/containers/:id/status), so the - * persisted column is no longer a source of truth and is dropped. + * (see utils/container-status.js) and embedded on the container API payloads, so + * the persisted column is no longer a source of truth and is dropped. * * The down migration restores the column (defaulting to 'running') for rollback, * but the historical per-container value cannot be recovered. diff --git a/create-a-container/openapi.v1.yaml b/create-a-container/openapi.v1.yaml index ec476381..540fb675 100644 --- a/create-a-container/openapi.v1.yaml +++ b/create-a-container/openapi.v1.yaml @@ -360,26 +360,6 @@ paths: get: { tags: [Containers], responses: { '200': { description: Container } } } put: { tags: [Containers], responses: { '200': { description: Updated, optional restart job } } } delete: { tags: [Containers], responses: { '200': { description: Deleted, with DNS cleanup warnings } } } - /sites/{siteId}/containers/{id}/status: - get: - tags: [Containers] - summary: Live container status (computed from Proxmox + jobs + config drift) - parameters: - - { in: path, name: siteId, required: true, schema: { type: integer } } - - { in: path, name: id, required: true, schema: { type: integer } } - responses: - '200': - description: Resolved container status - content: - application/json: - schema: - type: object - properties: - data: - type: object - properties: - status: { $ref: '#/components/schemas/ContainerStatus' } - '404': { description: Container not found } /sites/{siteId}/nodes: parameters: [{ in: path, name: siteId, required: true, schema: { type: integer } }] diff --git a/create-a-container/routers/api/v1/containers.js b/create-a-container/routers/api/v1/containers.js index 48a9c3bc..84c6003d 100644 --- a/create-a-container/routers/api/v1/containers.js +++ b/create-a-container/routers/api/v1/containers.js @@ -223,30 +223,6 @@ router.get( }), ); -// GET /containers/:id/status — live status for a single container -router.get( - '/:id/status', - asyncHandler(async (req, res) => { - const site = await loadSite(req); - const containerId = parseInt(req.params.id, 10); - if (!Number.isInteger(containerId) || containerId <= 0) { - throw new ApiError(404, 'not_found', 'Container not found'); - } - const c = await Container.findOne({ - where: { id: containerId, username: req.session.user }, - include: [ - { association: 'node' }, - { association: 'creationJob' }, - ], - }); - if (!c || !c.node || c.node.siteId !== site.id) { - throw new ApiError(404, 'not_found', 'Container not found'); - } - const status = await computeContainerStatus({ container: c, Job }); - return ok(res, { status }); - }), -); - // GET /containers/:id router.get( '/:id', From 73daf6c44a79068650a1ab039d293b375c2ddd53 Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 08:34:02 -0400 Subject: [PATCH 3/9] Address PR review feedback - out-of-sync is a warning, not a danger, badge variant - templates.js routes containers by ipv4Address only (a VMID isn't required to route) --- .../src/pages/containers/ContainersListPage.tsx | 2 +- create-a-container/routers/templates.js | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/create-a-container/client/src/pages/containers/ContainersListPage.tsx b/create-a-container/client/src/pages/containers/ContainersListPage.tsx index 834bce6a..20f2de07 100644 --- a/create-a-container/client/src/pages/containers/ContainersListPage.tsx +++ b/create-a-container/client/src/pages/containers/ContainersListPage.tsx @@ -47,9 +47,9 @@ function statusVariant( return 'success'; case 'creating': case 'restarting': + case 'out-of-sync': return 'warning'; case 'failed': - case 'out-of-sync': return 'danger'; case 'offline': case 'missing': diff --git a/create-a-container/routers/templates.js b/create-a-container/routers/templates.js index 4969dde5..28dab30a 100644 --- a/create-a-container/routers/templates.js +++ b/create-a-container/routers/templates.js @@ -5,12 +5,10 @@ const { requireLocalhostOrAdmin } = require('../middlewares'); const router = express.Router(); -// A container is eligible for routing/DNS config once it has been provisioned, -// i.e. it has a Proxmox VMID and an assigned IPv4 address. The old code filtered -// on the (now-removed) static status column; presence of an IP/VMID is the stable, -// Proxmox-independent proxy for "this container is live enough to route to". -const PROVISIONED_CONTAINER_WHERE = { - containerId: { [Op.ne]: null }, +// A container is routable once it has an assigned IPv4 address. The old code +// filtered on the (now-removed) static status column; presence of an IP is the +// stable, Proxmox-independent requirement for "this container can be routed to". +const ROUTABLE_CONTAINER_WHERE = { ipv4Address: { [Op.ne]: null }, }; @@ -22,7 +20,7 @@ async function loadDnsmasqSite(siteId) { include: [{ model: Container, as: 'containers', - where: PROVISIONED_CONTAINER_WHERE, + where: ROUTABLE_CONTAINER_WHERE, required: false, attributes: ['macAddress', 'ipv4Address', 'hostname'], }], @@ -50,7 +48,7 @@ router.get('/sites/:siteId/nginx', requireLocalhostOrAdmin, async (req, res) => include: [{ model: Container, as: 'containers', - where: PROVISIONED_CONTAINER_WHERE, + where: ROUTABLE_CONTAINER_WHERE, required: false, include: [{ model: Service, From 2f08810fa87a7f790d40492afcbef1c1ea75ab59 Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 08:45:08 -0400 Subject: [PATCH 4/9] Inline routable-container where clause in templates.js --- create-a-container/routers/templates.js | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/create-a-container/routers/templates.js b/create-a-container/routers/templates.js index 28dab30a..fb0bda71 100644 --- a/create-a-container/routers/templates.js +++ b/create-a-container/routers/templates.js @@ -5,13 +5,6 @@ const { requireLocalhostOrAdmin } = require('../middlewares'); const router = express.Router(); -// A container is routable once it has an assigned IPv4 address. The old code -// filtered on the (now-removed) static status column; presence of an IP is the -// stable, Proxmox-independent requirement for "this container can be routed to". -const ROUTABLE_CONTAINER_WHERE = { - ipv4Address: { [Op.ne]: null }, -}; - async function loadDnsmasqSite(siteId) { return Site.findByPk(siteId, { include: [{ @@ -20,7 +13,8 @@ async function loadDnsmasqSite(siteId) { include: [{ model: Container, as: 'containers', - where: ROUTABLE_CONTAINER_WHERE, + // Only containers with an assigned IPv4 are routable. + where: { ipv4Address: { [Op.ne]: null } }, required: false, attributes: ['macAddress', 'ipv4Address', 'hostname'], }], @@ -48,7 +42,8 @@ router.get('/sites/:siteId/nginx', requireLocalhostOrAdmin, async (req, res) => include: [{ model: Container, as: 'containers', - where: ROUTABLE_CONTAINER_WHERE, + // Only containers with an assigned IPv4 are routable. + where: { ipv4Address: { [Op.ne]: null } }, required: false, include: [{ model: Service, From be83d58b5a8cb381552f587cb6a18766f5f62cc2 Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 08:57:30 -0400 Subject: [PATCH 5/9] cleanup --- create-a-container/bin/create-container.js | 3 --- create-a-container/bin/reconfigure-container.js | 4 +--- create-a-container/routers/api/v1/containers.js | 2 -- create-a-container/routers/templates.js | 2 -- 4 files changed, 1 insertion(+), 10 deletions(-) diff --git a/create-a-container/bin/create-container.js b/create-a-container/bin/create-container.js index c7672e63..af221df9 100755 --- a/create-a-container/bin/create-container.js +++ b/create-a-container/bin/create-container.js @@ -481,9 +481,6 @@ async function main() { console.error('API Error Details:', JSON.stringify(err.response.data, null, 2)); } - // The container status is no longer persisted. Failure is recorded by the - // job-runner marking this job 'failure'; the live status resolver maps a - // missing-in-Proxmox container whose last create job failed to 'failed'. process.exit(1); } } diff --git a/create-a-container/bin/reconfigure-container.js b/create-a-container/bin/reconfigure-container.js index 6e31a248..1f702ce9 100644 --- a/create-a-container/bin/reconfigure-container.js +++ b/create-a-container/bin/reconfigure-container.js @@ -160,7 +160,7 @@ async function main() { throw new Error('Could not get IP address from Proxmox interfaces API'); } - // Update container record with MAC/IP (status is no longer persisted) + // Update container record with MAC/IP await container.update({ macAddress, ipv4Address @@ -181,8 +181,6 @@ async function main() { console.error('API Error Details:', JSON.stringify(err.response.data, null, 2)); } - // Container status is no longer persisted. The job-runner records this job as - // 'failure'; live status is derived from Proxmox + job state on read. process.exit(1); } } diff --git a/create-a-container/routers/api/v1/containers.js b/create-a-container/routers/api/v1/containers.js index 84c6003d..73a698b2 100644 --- a/create-a-container/routers/api/v1/containers.js +++ b/create-a-container/routers/api/v1/containers.js @@ -431,8 +431,6 @@ router.put( const dnsWarnings = []; await sequelize.transaction(async (t) => { if (envChanged || entrypointChanged) { - // Persist the new desired config only. The "restarting" state is no - // longer stored — it is derived from the active reconfigure job below. await container.update( { environmentVars: envVarsJson, diff --git a/create-a-container/routers/templates.js b/create-a-container/routers/templates.js index fb0bda71..938ce756 100644 --- a/create-a-container/routers/templates.js +++ b/create-a-container/routers/templates.js @@ -13,7 +13,6 @@ async function loadDnsmasqSite(siteId) { include: [{ model: Container, as: 'containers', - // Only containers with an assigned IPv4 are routable. where: { ipv4Address: { [Op.ne]: null } }, required: false, attributes: ['macAddress', 'ipv4Address', 'hostname'], @@ -42,7 +41,6 @@ router.get('/sites/:siteId/nginx', requireLocalhostOrAdmin, async (req, res) => include: [{ model: Container, as: 'containers', - // Only containers with an assigned IPv4 are routable. where: { ipv4Address: { [Op.ne]: null } }, required: false, include: [{ From 0de50b2dd64e049fff21142daf3f5a6167a74a9f Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 09:22:58 -0400 Subject: [PATCH 6/9] Address PR review feedback - container-status.js: match --container-id as a whole argument when looking up create/reconfigure jobs, so container 12 no longer matches container 123's jobs (which could misclassify status). LIKE narrows at the DB level; a regex confirms the id is terminated by a space or end-of-string. - resource-requests.js: the applyResourceToExistingContainers query filtered on the now-removed Containers.status column ("Unknown column 'status'" after the migration). Use containerId != null (a VMID is required to reconfigure anyway). - README: note the create endpoint also returns the live status field. --- create-a-container/README.md | 13 +++--- .../routers/api/v1/resource-requests.js | 7 +++- create-a-container/utils/container-status.js | 42 +++++++++++++++---- 3 files changed, 47 insertions(+), 15 deletions(-) diff --git a/create-a-container/README.md b/create-a-container/README.md index 6d6fb80c..3c5f596b 100644 --- a/create-a-container/README.md +++ b/create-a-container/README.md @@ -231,11 +231,11 @@ Delete a container from both Proxmox and the database - `500` - Proxmox API deletion failed or node not configured #### Container status (`status` field) -Every container returned by the list and show endpoints includes a **live** -`status` field, computed on demand rather than read from a stored column. It is -resolved by combining the container's presence/run-state in Proxmox, any active -create/reconfigure jobs, and whether the live LXC config matches what the API -server expects. Possible values: +Every container returned by the list, show, and create endpoints includes a +**live** `status` field, computed on demand rather than read from a stored +column. It is resolved by combining the container's presence/run-state in +Proxmox, any active create/reconfigure jobs, and whether the live LXC config +matches what the API server expects. Possible values: - `running` — online in Proxmox - `offline` — exists in Proxmox but stopped - `creating` — no Proxmox VM yet, active create job @@ -245,6 +245,9 @@ server expects. Possible values: - `out-of-sync` — exists in Proxmox but its config differs from expectation - `unknown` — Proxmox unreachable / node has no API credentials +The create endpoint (`POST /containers`) returns `creating` immediately, since a +create job is enqueued and there is no Proxmox VM yet. + #### `GET /status/:jobId` (Auth Required) View container creation progress page diff --git a/create-a-container/routers/api/v1/resource-requests.js b/create-a-container/routers/api/v1/resource-requests.js index ce047223..5997faef 100644 --- a/create-a-container/routers/api/v1/resource-requests.js +++ b/create-a-container/routers/api/v1/resource-requests.js @@ -217,12 +217,15 @@ router.put( ); /** - * Apply a resource change to existing running containers matching the identity. + * Apply a resource change to existing provisioned containers matching the identity. * This creates a reconfigure job for each matching container. */ async function applyResourceToExistingContainers(siteId, hostname, username, resourceType, value) { const containers = await Container.findAll({ - where: { siteId, hostname, username, status: 'running' }, + // Only containers that exist in Proxmox (have a VMID) can be reconfigured. + // The status column was removed; reconfigure-container.js exits early without + // a containerId, so this is the correct provisioned predicate. + where: { siteId, hostname, username, containerId: { [Op.ne]: null } }, }); if (containers.length === 0) return; diff --git a/create-a-container/utils/container-status.js b/create-a-container/utils/container-status.js index 735ec22b..ba81a0bb 100644 --- a/create-a-container/utils/container-status.js +++ b/create-a-container/utils/container-status.js @@ -49,6 +49,38 @@ function nodeHasCreds(node) { return !!(node && node.apiUrl && node.tokenId && node.secret); } +/** + * Escape a string for safe use inside a RegExp. + * @param {string|number} s + * @returns {string} + */ +function escapeRegExp(s) { + return String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +/** + * Find the most recent job for a container whose command contains + * ` --container-id=` as a *whole* argument. + * + * A plain SQL LIKE on `--container-id=` would also match longer ids + * (id 12 matches `--container-id=123`), so we use LIKE only to narrow at the DB + * level (ordered most-recent first) and then confirm each candidate in JS with a + * regex requiring the id to be terminated by a space or end-of-string. + * + * @param {object} Job - Job model + * @param {string} cmdFragment - e.g. CREATE_CMD or RECONFIGURE_CMD + * @param {number} id - Container database id + * @returns {Promise} + */ +async function findLatestJobForContainer(Job, cmdFragment, id) { + const candidates = await Job.findAll({ + where: { command: { [Op.like]: `%${cmdFragment} --container-id=${id}%` } }, + order: [['createdAt', 'DESC']], + }); + const whole = new RegExp(`${escapeRegExp(cmdFragment)} --container-id=${escapeRegExp(id)}(?:\\s|$)`); + return candidates.find((job) => whole.test(job.command)) || null; +} + /** * Find the most recent create job for a container. * Prefers the explicit creationJobId FK; falls back to a command-string match. @@ -61,10 +93,7 @@ async function findLatestCreateJob(container, Job) { if (container.creationJobId) { return Job.findByPk(container.creationJobId); } - return Job.findOne({ - where: { command: { [Op.like]: `%${CREATE_CMD} --container-id=${container.id}%` } }, - order: [['createdAt', 'DESC']], - }); + return findLatestJobForContainer(Job, CREATE_CMD, container.id); } /** @@ -74,10 +103,7 @@ async function findLatestCreateJob(container, Job) { * @returns {Promise} */ async function findLatestReconfigureJob(container, Job) { - return Job.findOne({ - where: { command: { [Op.like]: `%${RECONFIGURE_CMD} --container-id=${container.id}%` } }, - order: [['createdAt', 'DESC']], - }); + return findLatestJobForContainer(Job, RECONFIGURE_CMD, container.id); } function isActiveJob(job) { From 1546105b2a79eba747812e4e4ea2724a562bf74a Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 09:36:08 -0400 Subject: [PATCH 7/9] Use exact DB match for job id lookup instead of regex The calling convention for create/reconfigure job commands is stable and deterministic, so match the command string exactly in the query rather than fetching LIKE candidates and regex-filtering in JS: - create: exact match on `node bin/create-container.js --container-id=` (create commands never carry trailing args). - reconfigure: exact match OR ` %` (exact base followed by a space), which covers the optional ` --=` flags while still preventing id 12 from matching id 123. --- create-a-container/utils/container-status.js | 66 ++++++++------------ 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/create-a-container/utils/container-status.js b/create-a-container/utils/container-status.js index ba81a0bb..ccfb31e4 100644 --- a/create-a-container/utils/container-status.js +++ b/create-a-container/utils/container-status.js @@ -38,10 +38,17 @@ const STATUS = Object.freeze({ const STATUS_VALUES = Object.freeze(Object.values(STATUS)); -// Job command fragments that identify create vs reconfigure (restart) jobs. -// Jobs are distinguished solely by their `command` string (no type column). -const CREATE_CMD = 'bin/create-container.js'; -const RECONFIGURE_CMD = 'bin/reconfigure-container.js'; +// Job command builders. Jobs are distinguished solely by their `command` string +// (no type column), and the commands are constructed deterministically by the +// container router / resource-requests, so we match them exactly here. +// create: node bin/create-container.js --container-id= +// reconfigure: node bin/reconfigure-container.js --container-id=[ --=...] +function createCommand(id) { + return `node bin/create-container.js --container-id=${id}`; +} +function reconfigureCommand(id) { + return `node bin/reconfigure-container.js --container-id=${id}`; +} const ACTIVE_JOB_STATUSES = ['pending', 'running']; @@ -49,41 +56,10 @@ function nodeHasCreds(node) { return !!(node && node.apiUrl && node.tokenId && node.secret); } -/** - * Escape a string for safe use inside a RegExp. - * @param {string|number} s - * @returns {string} - */ -function escapeRegExp(s) { - return String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); -} - -/** - * Find the most recent job for a container whose command contains - * ` --container-id=` as a *whole* argument. - * - * A plain SQL LIKE on `--container-id=` would also match longer ids - * (id 12 matches `--container-id=123`), so we use LIKE only to narrow at the DB - * level (ordered most-recent first) and then confirm each candidate in JS with a - * regex requiring the id to be terminated by a space or end-of-string. - * - * @param {object} Job - Job model - * @param {string} cmdFragment - e.g. CREATE_CMD or RECONFIGURE_CMD - * @param {number} id - Container database id - * @returns {Promise} - */ -async function findLatestJobForContainer(Job, cmdFragment, id) { - const candidates = await Job.findAll({ - where: { command: { [Op.like]: `%${cmdFragment} --container-id=${id}%` } }, - order: [['createdAt', 'DESC']], - }); - const whole = new RegExp(`${escapeRegExp(cmdFragment)} --container-id=${escapeRegExp(id)}(?:\\s|$)`); - return candidates.find((job) => whole.test(job.command)) || null; -} - /** * Find the most recent create job for a container. - * Prefers the explicit creationJobId FK; falls back to a command-string match. + * Prefers the explicit creationJobId FK; falls back to an exact command match. + * Create commands never carry trailing arguments, so the match is exact. * @param {object} container - Container instance (with optional creationJob assoc) * @param {object} Job - Job model * @returns {Promise} @@ -93,17 +69,29 @@ async function findLatestCreateJob(container, Job) { if (container.creationJobId) { return Job.findByPk(container.creationJobId); } - return findLatestJobForContainer(Job, CREATE_CMD, container.id); + return Job.findOne({ + where: { command: createCommand(container.id) }, + order: [['createdAt', 'DESC']], + }); } /** * Find the most recent reconfigure (restart) job for a container, if any. + * A reconfigure command is either exactly ` --container-id=` or that + * followed by ` --=` flags, so we match the exact form OR the + * exact form followed by a space (the latter guards against id 12 matching 123). * @param {object} container - Container instance * @param {object} Job - Job model * @returns {Promise} */ async function findLatestReconfigureJob(container, Job) { - return findLatestJobForContainer(Job, RECONFIGURE_CMD, container.id); + const base = reconfigureCommand(container.id); + return Job.findOne({ + where: { + [Op.or]: [{ command: base }, { command: { [Op.like]: `${base} %` } }], + }, + order: [['createdAt', 'DESC']], + }); } function isActiveJob(job) { From 937624b6ab857d985b4a1ce161d743d814a973bd Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 10:22:16 -0400 Subject: [PATCH 8/9] Batch job lookups when resolving statuses for the index Resolving the container list status was O(N) DB queries because every container ran its own create/reconfigure job lookup (up to 2 queries each). Add prefetchLatestJobs() which fetches the latest create + reconfigure job for the whole set of containers in a single query (bounded WHERE clauses, independent of N), buckets them per container, and feeds the result into computeContainerStatus via an optional `jobs` param. computeContainerStatuses now issues one job query total instead of ~2N. The single-container show path is unchanged (still queries per container when no prefetched jobs are passed). --- create-a-container/utils/container-status.js | 109 +++++++++++++++++-- 1 file changed, 99 insertions(+), 10 deletions(-) diff --git a/create-a-container/utils/container-status.js b/create-a-container/utils/container-status.js index ccfb31e4..2c7189ce 100644 --- a/create-a-container/utils/container-status.js +++ b/create-a-container/utils/container-status.js @@ -43,11 +43,28 @@ const STATUS_VALUES = Object.freeze(Object.values(STATUS)); // container router / resource-requests, so we match them exactly here. // create: node bin/create-container.js --container-id= // reconfigure: node bin/reconfigure-container.js --container-id=[ --=...] +const CREATE_PREFIX = 'node bin/create-container.js --container-id='; +const RECONFIGURE_PREFIX = 'node bin/reconfigure-container.js --container-id='; + function createCommand(id) { - return `node bin/create-container.js --container-id=${id}`; + return `${CREATE_PREFIX}${id}`; } function reconfigureCommand(id) { - return `node bin/reconfigure-container.js --container-id=${id}`; + return `${RECONFIGURE_PREFIX}${id}`; +} + +// Extract the container id from a create/reconfigure job command, or null if the +// command isn't one of ours. The id is the run of digits immediately after a +// known prefix, terminated by end-of-string or a space (so 12 != 123). +const COMMAND_CONTAINER_ID_RE = new RegExp( + `^node bin/(?:create|reconfigure)-container\\.js --container-id=(\\d+)(?: |$)`, +); +function containerIdFromCommand(command) { + const m = COMMAND_CONTAINER_ID_RE.exec(command || ''); + return m ? parseInt(m[1], 10) : null; +} +function isCreateCommand(command) { + return typeof command === 'string' && command.startsWith(CREATE_PREFIX); } const ACTIVE_JOB_STATUSES = ['pending', 'running']; @@ -94,6 +111,60 @@ async function findLatestReconfigureJob(container, Job) { }); } +/** + * Prefetch the latest create and reconfigure job for many containers in a single + * DB query, so resolving N containers' statuses is O(1) queries instead of O(N). + * + * Jobs are matched by their deterministic command strings (exact create command, + * and reconfigure command optionally followed by resource flags) for the given + * container ids, fetched newest-first, then bucketed per container keeping the + * first (latest) of each kind. + * + * @param {Array} containers - Container instances. + * @param {object} Job - Job model. + * @returns {Promise>} + */ +async function prefetchLatestJobs(containers, Job) { + const byContainer = new Map(); + const ids = []; + for (const c of containers) { + byContainer.set(c.id, { createJob: null, reconfigureJob: null }); + ids.push(c.id); + } + if (ids.length === 0) return byContainer; + + // One query for all relevant jobs, with a bounded number of WHERE clauses + // (independent of N): + // - exact create commands for these ids + // - exact (arg-less) reconfigure commands for these ids + // - any reconfigure command carrying trailing flags (prefix LIKE) + // The prefix LIKE may match reconfigure jobs for containers not on this page; + // those are discarded during bucketing below (byContainer lookup). + const orClauses = [ + { command: { [Op.in]: ids.map(createCommand) } }, + { command: { [Op.in]: ids.map(reconfigureCommand) } }, + { command: { [Op.like]: `${RECONFIGURE_PREFIX}%` } }, + ]; + const jobs = await Job.findAll({ + where: { [Op.or]: orClauses }, + order: [['createdAt', 'DESC']], + }); + + // Bucket newest-first; the first job seen for a (container, kind) is the latest. + for (const job of jobs) { + const id = containerIdFromCommand(job.command); + if (id == null) continue; + const entry = byContainer.get(id); + if (!entry) continue; + if (isCreateCommand(job.command)) { + if (!entry.createJob) entry.createJob = job; + } else if (!entry.reconfigureJob) { + entry.reconfigureJob = job; + } + } + return byContainer; +} + function isActiveJob(job) { return !!job && ACTIVE_JOB_STATUSES.includes(job.status); } @@ -219,9 +290,13 @@ function decideStatus(facts) { * @param {{ data: Array, ok: boolean }} [params.snapshot] - Optional * pre-fetched clusterResources('lxc') snapshot for the node. `ok` indicates the * query succeeded; `data` is the resource list (used for batching the index). + * @param {{ createJob: object|null, reconfigureJob: object|null }} [params.jobs] - + * Optional prefetched latest create/reconfigure jobs for this container (see + * prefetchLatestJobs). When provided, the per-container job DB lookups are + * skipped, making batch resolution O(1) queries instead of O(N). * @returns {Promise} One of the STATUS values. */ -async function computeContainerStatus({ container, Job, api, snapshot }) { +async function computeContainerStatus({ container, Job, api, snapshot, jobs }) { const node = container.node; const hasCreds = nodeHasCreds(node); const hasVmid = container.containerId != null; @@ -261,7 +336,10 @@ async function computeContainerStatus({ container, Job, api, snapshot }) { } // --- Jobs --- - const reconfigureJob = await findLatestReconfigureJob(container, Job); + // Use prefetched jobs when supplied (batch path); otherwise query per container. + const reconfigureJob = jobs + ? jobs.reconfigureJob + : await findLatestReconfigureJob(container, Job); const restarting = isActiveJob(reconfigureJob); // Drift detection (only meaningful if it exists and isn't already restarting). @@ -282,7 +360,7 @@ async function computeContainerStatus({ container, Job, api, snapshot }) { let createFailed = false; let hasCreateJob = false; if (!inProxmox) { - const createJob = await findLatestCreateJob(container, Job); + const createJob = jobs ? jobs.createJob : await findLatestCreateJob(container, Job); hasCreateJob = !!createJob; creating = isActiveJob(createJob); createFailed = !!createJob && createJob.status === 'failure'; @@ -304,10 +382,12 @@ async function computeContainerStatus({ container, Job, api, snapshot }) { /** * Compute live statuses for many containers efficiently. * - * Groups containers by node so each node's Proxmox `clusterResources('lxc')` - * snapshot is fetched exactly once and a single authenticated client is reused - * for that node's containers (including per-container config reads). This keeps - * total end-user latency low for the list page versus N independent calls. + * Two batching strategies keep this fast for the list page: + * - Jobs: a single DB query fetches the latest create/reconfigure job for every + * container up front (O(1) queries instead of O(N)). + * - Proxmox: containers are grouped by node so each node's + * `clusterResources('lxc')` snapshot is fetched exactly once and a single + * authenticated client is reused for that node's containers. * * @param {Array} containers - Container instances (each with `node`). * @param {object} Job - Job model. @@ -316,6 +396,9 @@ async function computeContainerStatus({ container, Job, api, snapshot }) { async function computeContainerStatuses(containers, Job) { const result = new Map(); + // One query for everyone's latest create/reconfigure jobs. + const jobsByContainer = await prefetchLatestJobs(containers, Job); + // Group by node id (containers without a node still get resolved, just without // any Proxmox facts). const byNode = new Map(); @@ -344,8 +427,12 @@ async function computeContainerStatuses(containers, Job) { // Resolve each container in the group sequentially per node (shares the // single authenticated client); different nodes run in parallel. for (const container of group) { + const jobs = jobsByContainer.get(container.id) || { + createJob: null, + reconfigureJob: null, + }; // eslint-disable-next-line no-await-in-loop - const status = await computeContainerStatus({ container, Job, api, snapshot }); + const status = await computeContainerStatus({ container, Job, api, snapshot, jobs }); result.set(container.id, status); } }), @@ -366,4 +453,6 @@ module.exports = { findInSnapshot, findLatestCreateJob, findLatestReconfigureJob, + prefetchLatestJobs, + containerIdFromCommand, }; From db202a0048c1e0b427126139553d9050e0e0f9b4 Mon Sep 17 00:00:00 2001 From: Robert Gingras Date: Tue, 16 Jun 2026 10:48:10 -0400 Subject: [PATCH 9/9] Simplify live status to Proxmox run-state + create job (drop restarting, out-of-sync) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolving the container list was still slow because each container did per-item work: a command-string job lookup for 'restarting', and a Proxmox lxcConfig call for 'out-of-sync'. Both are inherently O(N) (Proxmox has no bulk config endpoint), which dominated index latency. Drop both statuses: - 'restarting': not used by known API clients (launchpad only checks == running), and reconfigure jobs have no FK so detecting it required command-string queries. - 'out-of-sync': required one lxcConfig call per in-Proxmox container; no bulk Proxmox config API exists to batch it. Status now derives only from: - Proxmox run-state, via one clusterResources('lxc') snapshot per node, and - the create job, resolved from the eager-loaded creationJob association (or the creationJobId FK) — no command-string queries. Result: the index makes one Proxmox call per node and zero per-container Proxmox or DB job queries (verified at 30 containers). Remaining statuses: running, offline, creating, failed, missing, unknown. PUT still enqueues a reconfigure job and returns its jobId; only the 'restarting' status label is removed. Updates the resolver, containers router (eager-load creationJob on the index), client types/badges, OpenAPI enum, and README. --- create-a-container/README.md | 10 +- create-a-container/client/src/lib/types.ts | 4 +- .../pages/containers/ContainersListPage.tsx | 4 - create-a-container/openapi.v1.yaml | 10 +- .../routers/api/v1/containers.js | 4 +- create-a-container/utils/container-status.js | 319 +++--------------- 6 files changed, 60 insertions(+), 291 deletions(-) diff --git a/create-a-container/README.md b/create-a-container/README.md index 3c5f596b..a9ba5832 100644 --- a/create-a-container/README.md +++ b/create-a-container/README.md @@ -233,16 +233,14 @@ Delete a container from both Proxmox and the database #### Container status (`status` field) Every container returned by the list, show, and create endpoints includes a **live** `status` field, computed on demand rather than read from a stored -column. It is resolved by combining the container's presence/run-state in -Proxmox, any active create/reconfigure jobs, and whether the live LXC config -matches what the API server expects. Possible values: +column. It is resolved by combining the container's run-state in Proxmox (from a +single per-node cluster snapshot) with the state of its create job. Possible +values: - `running` — online in Proxmox - `offline` — exists in Proxmox but stopped - `creating` — no Proxmox VM yet, active create job -- `restarting` — active reconfigure job -- `failed` — no Proxmox VM, last create job failed +- `failed` — no Proxmox VM, create job failed - `missing` — no Proxmox VM, create succeeded or no create job found -- `out-of-sync` — exists in Proxmox but its config differs from expectation - `unknown` — Proxmox unreachable / node has no API credentials The create endpoint (`POST /containers`) returns `creating` immediately, since a diff --git a/create-a-container/client/src/lib/types.ts b/create-a-container/client/src/lib/types.ts index e4e077d3..809db607 100644 --- a/create-a-container/client/src/lib/types.ts +++ b/create-a-container/client/src/lib/types.ts @@ -91,17 +91,15 @@ export interface Container { } /** - * Live container status resolved from Proxmox + job state + config drift. + * Live container status resolved from Proxmox run-state + create-job state. * Embedded on each Container returned by the list/show/create endpoints. */ export type ContainerStatus = | 'running' | 'offline' | 'creating' - | 'restarting' | 'failed' | 'missing' - | 'out-of-sync' | 'unknown'; export interface ContainerCreateResult { diff --git a/create-a-container/client/src/pages/containers/ContainersListPage.tsx b/create-a-container/client/src/pages/containers/ContainersListPage.tsx index 20f2de07..5539ebf6 100644 --- a/create-a-container/client/src/pages/containers/ContainersListPage.tsx +++ b/create-a-container/client/src/pages/containers/ContainersListPage.tsx @@ -46,8 +46,6 @@ function statusVariant( case 'running': return 'success'; case 'creating': - case 'restarting': - case 'out-of-sync': return 'warning'; case 'failed': return 'danger'; @@ -64,10 +62,8 @@ const STATUS_LABELS: Record = { running: 'Running', offline: 'Offline', creating: 'Creating', - restarting: 'Restarting', failed: 'Failed', missing: 'Missing', - 'out-of-sync': 'Out of sync', unknown: 'Unknown', }; diff --git a/create-a-container/openapi.v1.yaml b/create-a-container/openapi.v1.yaml index 540fb675..70e137e1 100644 --- a/create-a-container/openapi.v1.yaml +++ b/create-a-container/openapi.v1.yaml @@ -107,24 +107,20 @@ components: ContainerStatus: type: string description: >- - Live container status, resolved on read from Proxmox presence/run-state, - active jobs, and configuration drift. + Live container status, resolved on read from Proxmox run-state and the + create job. running = online in Proxmox; offline = exists in Proxmox but stopped; creating = no Proxmox VM yet, active create job; - restarting = active reconfigure job; - failed = no Proxmox VM, last create job failed; + failed = no Proxmox VM, create job failed; missing = no Proxmox VM, create succeeded or no create job; - out-of-sync = exists in Proxmox but config differs from expectation; unknown = Proxmox unreachable / no node credentials. enum: - running - offline - creating - - restarting - failed - missing - - out-of-sync - unknown Node: type: object diff --git a/create-a-container/routers/api/v1/containers.js b/create-a-container/routers/api/v1/containers.js index 73a698b2..8f6fbfe4 100644 --- a/create-a-container/routers/api/v1/containers.js +++ b/create-a-container/routers/api/v1/containers.js @@ -211,10 +211,12 @@ router.get( }, // Full node record (incl. credentials) is required to query live Proxmox status. { association: 'node' }, + // Eager-load the create job so status resolution needs no per-container query. + { association: 'creationJob' }, ], }); // Resolve live statuses for the whole page in one pass: one Proxmox snapshot - // per node (shared), rather than N independent round-trips from the browser. + // per node (shared), and no per-container DB queries (create job is loaded above). const statuses = await computeContainerStatuses(rows, Job); return ok( res, diff --git a/create-a-container/utils/container-status.js b/create-a-container/utils/container-status.js index 2c7189ce..0c94c9d5 100644 --- a/create-a-container/utils/container-status.js +++ b/create-a-container/utils/container-status.js @@ -5,68 +5,40 @@ * was only mutated by the create/reconfigure job scripts. That value drifts out * of reality whenever something changes a container directly in Proxmox (or when * a job dies without updating the row). This module computes the real status on - * demand by combining three sources of truth: + * demand by combining two sources of truth: * - * 1. Proxmox — does the LXC exist, and is it running or stopped? - * 2. Jobs — is there an active create/restart job, or did the last create - * job fail? - * 3. Config — does the live LXC config match what the API server expects? + * 1. Proxmox — does the LXC exist, and is it running or stopped? This comes + * from a single per-node `clusterResources('lxc')` snapshot. + * 2. Create job — for containers not yet in Proxmox: is the create job still + * active, did it fail, or is it gone? The create job is linked + * to the container by the `creationJobId` foreign key, so this + * is a cheap primary-key lookup (or free when eager-loaded). * - * Resolved statuses (a strict superset of the old running/offline values): - * running — exists in Proxmox and is online - * offline — exists in Proxmox but is stopped - * restarting — has an active (pending/running) reconfigure job - * creating — not in Proxmox, but has an active (pending/running) create job - * failed — not in Proxmox, last create job returned failure - * missing — not in Proxmox, create job succeeded or no create job found - * out-of-sync — exists in Proxmox but its config doesn't match expectation - * unknown — Proxmox could not be reached / node has no API credentials + * Resolved statuses: + * running — exists in Proxmox and is online + * offline — exists in Proxmox but is stopped + * creating — not in Proxmox, but has an active (pending/running) create job + * failed — not in Proxmox, create job returned failure + * missing — not in Proxmox, create job succeeded or no create job found + * unknown — Proxmox could not be reached / node has no API credentials + * + * Note: there is intentionally no per-container Proxmox config read here. Proxmox + * has no bulk config endpoint, so config-drift detection would be O(N) network + * round-trips for the list page; it was dropped in favour of keeping status + * resolution cheap (one Proxmox call per node, zero per container). */ -const { Op } = require('sequelize'); - const STATUS = Object.freeze({ RUNNING: 'running', OFFLINE: 'offline', CREATING: 'creating', - RESTARTING: 'restarting', FAILED: 'failed', MISSING: 'missing', - OUT_OF_SYNC: 'out-of-sync', UNKNOWN: 'unknown', }); const STATUS_VALUES = Object.freeze(Object.values(STATUS)); -// Job command builders. Jobs are distinguished solely by their `command` string -// (no type column), and the commands are constructed deterministically by the -// container router / resource-requests, so we match them exactly here. -// create: node bin/create-container.js --container-id= -// reconfigure: node bin/reconfigure-container.js --container-id=[ --=...] -const CREATE_PREFIX = 'node bin/create-container.js --container-id='; -const RECONFIGURE_PREFIX = 'node bin/reconfigure-container.js --container-id='; - -function createCommand(id) { - return `${CREATE_PREFIX}${id}`; -} -function reconfigureCommand(id) { - return `${RECONFIGURE_PREFIX}${id}`; -} - -// Extract the container id from a create/reconfigure job command, or null if the -// command isn't one of ours. The id is the run of digits immediately after a -// known prefix, terminated by end-of-string or a space (so 12 != 123). -const COMMAND_CONTAINER_ID_RE = new RegExp( - `^node bin/(?:create|reconfigure)-container\\.js --container-id=(\\d+)(?: |$)`, -); -function containerIdFromCommand(command) { - const m = COMMAND_CONTAINER_ID_RE.exec(command || ''); - return m ? parseInt(m[1], 10) : null; -} -function isCreateCommand(command) { - return typeof command === 'string' && command.startsWith(CREATE_PREFIX); -} - const ACTIVE_JOB_STATUSES = ['pending', 'running']; function nodeHasCreds(node) { @@ -74,167 +46,24 @@ function nodeHasCreds(node) { } /** - * Find the most recent create job for a container. - * Prefers the explicit creationJobId FK; falls back to an exact command match. - * Create commands never carry trailing arguments, so the match is exact. - * @param {object} container - Container instance (with optional creationJob assoc) + * Resolve the create job for a container via its `creationJobId` foreign key. + * Prefers an already-loaded `creationJob` association (zero queries); otherwise + * does a single primary-key lookup. Returns null if the container has no linked + * create job. + * @param {object} container - Container instance (ideally with creationJob assoc) * @param {object} Job - Job model * @returns {Promise} */ -async function findLatestCreateJob(container, Job) { +async function findCreateJob(container, Job) { if (container.creationJob) return container.creationJob; - if (container.creationJobId) { - return Job.findByPk(container.creationJobId); - } - return Job.findOne({ - where: { command: createCommand(container.id) }, - order: [['createdAt', 'DESC']], - }); -} - -/** - * Find the most recent reconfigure (restart) job for a container, if any. - * A reconfigure command is either exactly ` --container-id=` or that - * followed by ` --=` flags, so we match the exact form OR the - * exact form followed by a space (the latter guards against id 12 matching 123). - * @param {object} container - Container instance - * @param {object} Job - Job model - * @returns {Promise} - */ -async function findLatestReconfigureJob(container, Job) { - const base = reconfigureCommand(container.id); - return Job.findOne({ - where: { - [Op.or]: [{ command: base }, { command: { [Op.like]: `${base} %` } }], - }, - order: [['createdAt', 'DESC']], - }); -} - -/** - * Prefetch the latest create and reconfigure job for many containers in a single - * DB query, so resolving N containers' statuses is O(1) queries instead of O(N). - * - * Jobs are matched by their deterministic command strings (exact create command, - * and reconfigure command optionally followed by resource flags) for the given - * container ids, fetched newest-first, then bucketed per container keeping the - * first (latest) of each kind. - * - * @param {Array} containers - Container instances. - * @param {object} Job - Job model. - * @returns {Promise>} - */ -async function prefetchLatestJobs(containers, Job) { - const byContainer = new Map(); - const ids = []; - for (const c of containers) { - byContainer.set(c.id, { createJob: null, reconfigureJob: null }); - ids.push(c.id); - } - if (ids.length === 0) return byContainer; - - // One query for all relevant jobs, with a bounded number of WHERE clauses - // (independent of N): - // - exact create commands for these ids - // - exact (arg-less) reconfigure commands for these ids - // - any reconfigure command carrying trailing flags (prefix LIKE) - // The prefix LIKE may match reconfigure jobs for containers not on this page; - // those are discarded during bucketing below (byContainer lookup). - const orClauses = [ - { command: { [Op.in]: ids.map(createCommand) } }, - { command: { [Op.in]: ids.map(reconfigureCommand) } }, - { command: { [Op.like]: `${RECONFIGURE_PREFIX}%` } }, - ]; - const jobs = await Job.findAll({ - where: { [Op.or]: orClauses }, - order: [['createdAt', 'DESC']], - }); - - // Bucket newest-first; the first job seen for a (container, kind) is the latest. - for (const job of jobs) { - const id = containerIdFromCommand(job.command); - if (id == null) continue; - const entry = byContainer.get(id); - if (!entry) continue; - if (isCreateCommand(job.command)) { - if (!entry.createJob) entry.createJob = job; - } else if (!entry.reconfigureJob) { - entry.reconfigureJob = job; - } - } - return byContainer; + if (container.creationJobId) return Job.findByPk(container.creationJobId); + return null; } function isActiveJob(job) { return !!job && ACTIVE_JOB_STATUSES.includes(job.status); } -/** - * Parse a NUL-separated Proxmox env string ("K=V\0K2=V2") into an object. - * @param {string|null|undefined} envStr - * @returns {Object} - */ -function parseEnvString(envStr) { - const out = {}; - if (!envStr) return out; - for (const pair of String(envStr).split('\0')) { - if (!pair) continue; - const eq = pair.indexOf('='); - if (eq > 0) out[pair.substring(0, eq)] = pair.substring(eq + 1); - } - return out; -} - -function shallowEqualMap(a, b) { - const ak = Object.keys(a); - const bk = Object.keys(b); - if (ak.length !== bk.length) return false; - for (const k of ak) { - if (a[k] !== b[k]) return false; - } - return true; -} - -/** - * Compare the env/entrypoint the API server expects against the live LXC config. - * - * The expectation is built the same way the reconfigure job builds it - * (`container.buildLxcEnvConfig()`), so this mirrors exactly what *would* be sent - * to Proxmox. Env is compared as an unordered set of key=value pairs because the - * Proxmox API does not preserve ordering. Returns true when the live config - * matches the expectation (i.e. in sync). - * - * @param {object} container - Container instance (provides buildLxcEnvConfig) - * @param {object} liveConfig - Result of ProxmoxApi.lxcConfig(node, vmid) - * @returns {boolean} true if in sync, false if drifted - */ -function configMatches(container, liveConfig) { - const expected = - typeof container.buildLxcEnvConfig === 'function' ? container.buildLxcEnvConfig() : {}; - const expectedDeletes = new Set( - (expected.delete ? String(expected.delete).split(',') : []).map((s) => s.trim()), - ); - - // --- entrypoint --- - const liveEntrypoint = liveConfig?.entrypoint || null; - if (expectedDeletes.has('entrypoint')) { - if (liveEntrypoint) return false; // expected absent, but present live - } else if ((expected.entrypoint || null) !== liveEntrypoint) { - return false; - } - - // --- env --- - const liveEnv = parseEnvString(liveConfig?.env); - if (expectedDeletes.has('env')) { - if (Object.keys(liveEnv).length > 0) return false; // expected none, but some live - } else { - const expectedEnv = parseEnvString(expected.env); - if (!shallowEqualMap(expectedEnv, liveEnv)) return false; - } - - return true; -} - /** * Locate a container in a cluster-resources snapshot by VMID. * @param {Array|null} snapshot - Result of ProxmoxApi.clusterResources('lxc') @@ -254,23 +83,17 @@ function findInSnapshot(snapshot, vmid) { * @param {boolean} facts.proxmoxReachable - Whether Proxmox was queried OK. * @param {boolean} facts.inProxmox - Whether the LXC exists in Proxmox. * @param {boolean} facts.proxmoxRunning - Whether the LXC is running. - * @param {boolean} facts.inSync - Whether the live config matches expectation. * @param {boolean} facts.hasVmid - Whether the container has a Proxmox VMID. * @param {boolean} facts.creating - Active create job present. - * @param {boolean} facts.restarting - Active reconfigure job present. - * @param {boolean} facts.createFailed - Latest create job returned failure. - * @param {boolean} facts.hasCreateJob - A create job exists at all. + * @param {boolean} facts.createFailed - Create job returned failure. * @returns {string} STATUS value */ function decideStatus(facts) { if (facts.inProxmox) { - if (facts.restarting) return STATUS.RESTARTING; - if (facts.inSync === false) return STATUS.OUT_OF_SYNC; return facts.proxmoxRunning ? STATUS.RUNNING : STATUS.OFFLINE; } if (facts.creating) return STATUS.CREATING; - if (facts.restarting) return STATUS.RESTARTING; // We had a VMID + creds but couldn't reach Proxmox: don't guess missing. if (facts.hasVmid && !facts.proxmoxReachable) return STATUS.UNKNOWN; @@ -283,30 +106,21 @@ function decideStatus(facts) { * Compute the live status of a single container. * * @param {object} params - * @param {object} params.container - Container instance (with `node` association). + * @param {object} params.container - Container instance (with `node` association, + * and ideally the `creationJob` association eager-loaded). * @param {object} params.Job - Job model (from models). * @param {object} [params.api] - Optional pre-authenticated ProxmoxApi client for * the container's node (lets callers reuse one client across many containers). * @param {{ data: Array, ok: boolean }} [params.snapshot] - Optional * pre-fetched clusterResources('lxc') snapshot for the node. `ok` indicates the * query succeeded; `data` is the resource list (used for batching the index). - * @param {{ createJob: object|null, reconfigureJob: object|null }} [params.jobs] - - * Optional prefetched latest create/reconfigure jobs for this container (see - * prefetchLatestJobs). When provided, the per-container job DB lookups are - * skipped, making batch resolution O(1) queries instead of O(N). * @returns {Promise} One of the STATUS values. */ -async function computeContainerStatus({ container, Job, api, snapshot, jobs }) { +async function computeContainerStatus({ container, Job, api, snapshot }) { const node = container.node; const hasCreds = nodeHasCreds(node); const hasVmid = container.containerId != null; - let client = api || null; - async function getClient() { - if (!client) client = await node.api(); - return client; - } - // --- Determine Proxmox presence / run state --- let proxmoxReachable = false; let inProxmox = false; @@ -319,8 +133,9 @@ async function computeContainerStatus({ container, Job, api, snapshot, jobs }) { proxmoxReachable = !!snapshot.ok; resources = snapshot.ok ? snapshot.data : null; } else { - const c = await getClient(); - resources = await c.clusterResources('lxc'); + // Single-container path (no shared snapshot): fetch this node's once. + const client = api || (await node.api()); + resources = await client.clusterResources('lxc'); proxmoxReachable = true; } if (proxmoxReachable) { @@ -335,33 +150,13 @@ async function computeContainerStatus({ container, Job, api, snapshot, jobs }) { } } - // --- Jobs --- - // Use prefetched jobs when supplied (batch path); otherwise query per container. - const reconfigureJob = jobs - ? jobs.reconfigureJob - : await findLatestReconfigureJob(container, Job); - const restarting = isActiveJob(reconfigureJob); - - // Drift detection (only meaningful if it exists and isn't already restarting). - let inSync = null; - if (inProxmox && !restarting && hasCreds) { - try { - const c = await getClient(); - const liveConfig = await c.lxcConfig(node.name, container.containerId); - inSync = configMatches(container, liveConfig); - } catch (err) { - inSync = null; // can't assert drift - } - } - - // Only look up the create job when we actually need it (container not running - // in Proxmox), to avoid an extra query on the happy path. + // Only inspect the create job when the container isn't in Proxmox, to + // distinguish creating / failed / missing. The create job is linked by FK + // (creationJobId), so this is a cheap PK lookup or free when eager-loaded. let creating = false; let createFailed = false; - let hasCreateJob = false; if (!inProxmox) { - const createJob = jobs ? jobs.createJob : await findLatestCreateJob(container, Job); - hasCreateJob = !!createJob; + const createJob = await findCreateJob(container, Job); creating = isActiveJob(createJob); createFailed = !!createJob && createJob.status === 'failure'; } @@ -370,35 +165,29 @@ async function computeContainerStatus({ container, Job, api, snapshot, jobs }) { proxmoxReachable, inProxmox, proxmoxRunning, - inSync, hasVmid, creating, - restarting, createFailed, - hasCreateJob, }); } /** * Compute live statuses for many containers efficiently. * - * Two batching strategies keep this fast for the list page: - * - Jobs: a single DB query fetches the latest create/reconfigure job for every - * container up front (O(1) queries instead of O(N)). + * No per-container Proxmox calls and no per-container DB queries are issued: * - Proxmox: containers are grouped by node so each node's - * `clusterResources('lxc')` snapshot is fetched exactly once and a single - * authenticated client is reused for that node's containers. + * `clusterResources('lxc')` snapshot is fetched exactly once. + * - Create job: resolved from each container's eager-loaded `creationJob` + * association (or its creationJobId FK). * - * @param {Array} containers - Container instances (each with `node`). + * @param {Array} containers - Container instances (each with `node`, and + * ideally the `creationJob` association loaded to avoid per-container queries). * @param {object} Job - Job model. * @returns {Promise>} Map of container.id -> STATUS value. */ async function computeContainerStatuses(containers, Job) { const result = new Map(); - // One query for everyone's latest create/reconfigure jobs. - const jobsByContainer = await prefetchLatestJobs(containers, Job); - // Group by node id (containers without a node still get resolved, just without // any Proxmox facts). const byNode = new Map(); @@ -411,12 +200,11 @@ async function computeContainerStatuses(containers, Job) { await Promise.all( Array.from(byNode.values()).map(async (group) => { const node = group[0].node; - let api = null; let snapshot = { ok: false, data: null }; if (nodeHasCreds(node)) { try { - api = await node.api(); + const api = await node.api(); const data = await api.clusterResources('lxc'); snapshot = { ok: true, data }; } catch (err) { @@ -424,15 +212,11 @@ async function computeContainerStatuses(containers, Job) { } } - // Resolve each container in the group sequentially per node (shares the - // single authenticated client); different nodes run in parallel. + // Resolution is now CPU-only per container (no Proxmox/DB I/O when the + // create job is eager-loaded), so the snapshot is reused for all of them. for (const container of group) { - const jobs = jobsByContainer.get(container.id) || { - createJob: null, - reconfigureJob: null, - }; // eslint-disable-next-line no-await-in-loop - const status = await computeContainerStatus({ container, Job, api, snapshot, jobs }); + const status = await computeContainerStatus({ container, Job, snapshot }); result.set(container.id, status); } }), @@ -448,11 +232,6 @@ module.exports = { computeContainerStatuses, decideStatus, // exported for reuse / testing - configMatches, - parseEnvString, findInSnapshot, - findLatestCreateJob, - findLatestReconfigureJob, - prefetchLatestJobs, - containerIdFromCommand, + findCreateJob, };