diff --git a/create-a-container/README.md b/create-a-container/README.md index 4c135ef0..a9ba5832 100644 --- a/create-a-container/README.md +++ b/create-a-container/README.md @@ -22,7 +22,6 @@ erDiagram int id PK string hostname UK "FQDN hostname" string username "Owner username" - string status "pending,creating,running,failed" string template "Template name" int creationJobId FK "References Job" int nodeId FK "References Node" @@ -231,6 +230,22 @@ Delete a container from both Proxmox and the database - `403` - User doesn't own the container - `500` - Proxmox API deletion failed or node not configured +#### Container status (`status` field) +Every container returned by the list, show, and create endpoints includes a +**live** `status` field, computed on demand rather than read from a stored +column. It is resolved by combining the container's run-state in Proxmox (from a +single per-node cluster snapshot) with the state of its create job. Possible +values: +- `running` — online in Proxmox +- `offline` — exists in Proxmox but stopped +- `creating` — no Proxmox VM yet, active create job +- `failed` — no Proxmox VM, create job failed +- `missing` — no Proxmox VM, create succeeded or no create job found +- `unknown` — Proxmox unreachable / node has no API credentials + +The create endpoint (`POST /containers`) returns `creating` immediately, since a +create job is enqueued and there is no Proxmox VM yet. + #### `GET /status/:jobId` (Auth Required) View container creation progress page diff --git a/create-a-container/bin/create-container.js b/create-a-container/bin/create-container.js index 68694e25..af221df9 100755 --- a/create-a-container/bin/create-container.js +++ b/create-a-container/bin/create-container.js @@ -177,9 +177,14 @@ async function main() { console.error(`Container with ID ${containerId} not found`); process.exit(1); } - - if (container.status !== 'pending') { - console.error(`Container is not in pending status (current: ${container.status})`); + + // Guard against double-provisioning. The status column no longer exists, so + // we use the Proxmox VMID as the signal: once a VMID has been allocated and + // stored, creation has already run for this container. + if (container.containerId) { + console.error( + `Container already has a Proxmox VMID (${container.containerId}); refusing to re-create`, + ); process.exit(1); } @@ -217,10 +222,6 @@ async function main() { console.log(`Template type: ${isDocker ? 'Docker image' : 'Proxmox template'}`); try { - // Update status to 'creating' - await container.update({ status: 'creating' }); - console.log('Status updated to: creating'); - // Get the Proxmox API client const client = await node.api(); console.log('Proxmox API client initialized'); @@ -428,8 +429,7 @@ async function main() { console.log('Updating container record...'); await container.update({ macAddress, - ipv4Address, - status: 'running' + ipv4Address }); console.log('Container creation completed successfully!'); @@ -480,15 +480,7 @@ async function main() { if (err.response?.data) { console.error('API Error Details:', JSON.stringify(err.response.data, null, 2)); } - - // Update status to failed - try { - await container.update({ status: 'failed' }); - console.log('Status updated to: failed'); - } catch (updateErr) { - console.error('Failed to update container status:', updateErr.message); - } - + process.exit(1); } } diff --git a/create-a-container/bin/json-to-sql.js b/create-a-container/bin/json-to-sql.js index 94c02243..f55af4d5 100644 --- a/create-a-container/bin/json-to-sql.js +++ b/create-a-container/bin/json-to-sql.js @@ -222,7 +222,6 @@ async function run() { hostname, ipv4Address: obj.ip, username: obj.user || '', - status: 'running', template: obj.template || null, containerId: obj.ctid, macAddress: obj.mac, @@ -234,7 +233,6 @@ async function run() { await container.update({ ipv4Address: obj.ip, username: obj.user || '', - status: container.status || 'running', template: obj.template || container.template, containerId: obj.ctid, macAddress: obj.mac diff --git a/create-a-container/bin/reconfigure-container.js b/create-a-container/bin/reconfigure-container.js index ee5ba86e..1f702ce9 100644 --- a/create-a-container/bin/reconfigure-container.js +++ b/create-a-container/bin/reconfigure-container.js @@ -160,14 +160,13 @@ async function main() { throw new Error('Could not get IP address from Proxmox interfaces API'); } - // Update container record with MAC/IP and running status + // Update container record with MAC/IP await container.update({ - status: 'running', macAddress, ipv4Address }); - console.log('Status updated to: running'); + console.log('Reconfiguration applied; container running'); console.log(` MAC: ${macAddress}`); console.log(` IP: ${ipv4Address}`); } @@ -181,15 +180,7 @@ async function main() { if (err.response?.data) { console.error('API Error Details:', JSON.stringify(err.response.data, null, 2)); } - - // Update status to failed - try { - await container.update({ status: 'failed' }); - console.log('Status updated to: failed'); - } catch (updateErr) { - console.error('Failed to update container status:', updateErr.message); - } - + process.exit(1); } } diff --git a/create-a-container/client/src/lib/types.ts b/create-a-container/client/src/lib/types.ts index 85701ab3..809db607 100644 --- a/create-a-container/client/src/lib/types.ts +++ b/create-a-container/client/src/lib/types.ts @@ -75,7 +75,7 @@ export interface Container { hostname: string; ipv4Address: string | null; macAddress: string | null; - status: string; + status: ContainerStatus; template: string | null; creationJobId: number | null; entrypoint: string | null; @@ -90,11 +90,23 @@ export interface Container { createdAt: string; } +/** + * Live container status resolved from Proxmox run-state + create-job state. + * Embedded on each Container returned by the list/show/create endpoints. + */ +export type ContainerStatus = + | 'running' + | 'offline' + | 'creating' + | 'failed' + | 'missing' + | 'unknown'; + export interface ContainerCreateResult { containerId: number; jobId: number; hostname: string; - status: string; + status: ContainerStatus; } export interface ContainerNewBootstrap { diff --git a/create-a-container/client/src/pages/containers/ContainersListPage.tsx b/create-a-container/client/src/pages/containers/ContainersListPage.tsx index ec5a195f..5539ebf6 100644 --- a/create-a-container/client/src/pages/containers/ContainersListPage.tsx +++ b/create-a-container/client/src/pages/containers/ContainersListPage.tsx @@ -34,28 +34,44 @@ import { import { api, ApiError } from '@/lib/api'; import { useSession } from '@/lib/auth'; import { keys, queries } from '@/lib/queries'; -import type { Container } from '@/lib/types'; +import type { Container, ContainerStatus } from '@/lib/types'; type ViewMode = 'cards' | 'table'; const VIEW_STORAGE_KEY = 'containers:view'; -function statusVariant(s: string): 'default' | 'success' | 'warning' | 'danger' | 'secondary' { +function statusVariant( + s: ContainerStatus, +): 'default' | 'success' | 'warning' | 'danger' | 'secondary' { switch (s) { case 'running': return 'success'; - case 'pending': - case 'restarting': + case 'creating': return 'warning'; case 'failed': - case 'error': return 'danger'; - case 'stopped': + case 'offline': + case 'missing': return 'secondary'; default: return 'default'; } } +// Human-readable labels for the live status values. +const STATUS_LABELS: Record = { + running: 'Running', + offline: 'Offline', + creating: 'Creating', + failed: 'Failed', + missing: 'Missing', + unknown: 'Unknown', +}; + +/** Status badge. The status is the live value embedded in the list response. */ +function StatusBadge({ status }: { status: ContainerStatus }) { + return {STATUS_LABELS[status] ?? status}; +} + const linkClass = 'text-(--color-primary,#1d4ed8) hover:underline'; /** Shorten a full image ref to just its name+tag, e.g. ghcr.io/mieweb/base:latest -> base:latest */ @@ -330,7 +346,7 @@ export function ContainersListPage() { {c.hostname} - {c.status} +
@@ -374,7 +390,7 @@ export function ContainersListPage() { {c.hostname} - {c.status} + diff --git a/create-a-container/migrations/20260615000000-remove-container-status.js b/create-a-container/migrations/20260615000000-remove-container-status.js new file mode 100644 index 00000000..39d7b50e --- /dev/null +++ b/create-a-container/migrations/20260615000000-remove-container-status.js @@ -0,0 +1,26 @@ +'use strict'; +/** @type {import('sequelize-cli').Migration} */ + +/** + * Remove the static `status` column from Containers. + * + * Container status is now computed live from Proxmox + job state + config drift + * (see utils/container-status.js) and embedded on the container API payloads, so + * the persisted column is no longer a source of truth and is dropped. + * + * The down migration restores the column (defaulting to 'running') for rollback, + * but the historical per-container value cannot be recovered. + */ +module.exports = { + async up(queryInterface) { + await queryInterface.removeColumn('Containers', 'status'); + }, + + async down(queryInterface, Sequelize) { + await queryInterface.addColumn('Containers', 'status', { + type: Sequelize.STRING(20), + allowNull: false, + defaultValue: 'running', + }); + }, +}; diff --git a/create-a-container/models/container.js b/create-a-container/models/container.js index 721cebfe..b6bfc83e 100644 --- a/create-a-container/models/container.js +++ b/create-a-container/models/container.js @@ -268,11 +268,6 @@ module.exports = (sequelize, DataTypes) => { type: DataTypes.STRING(255), allowNull: false }, - status: { - type: DataTypes.STRING(20), - allowNull: false, - defaultValue: 'pending' - }, template: { type: DataTypes.STRING(255), allowNull: true diff --git a/create-a-container/openapi.v1.yaml b/create-a-container/openapi.v1.yaml index e049dd16..70e137e1 100644 --- a/create-a-container/openapi.v1.yaml +++ b/create-a-container/openapi.v1.yaml @@ -89,7 +89,7 @@ components: id: { type: integer } hostname: { type: string } containerId: { type: integer, nullable: true } - status: { type: string } + status: { $ref: '#/components/schemas/ContainerStatus' } template: { type: string } ipv4Address: { type: string, nullable: true } macAddress: { type: string, nullable: true } @@ -104,6 +104,24 @@ components: services: type: array items: { type: object } + ContainerStatus: + type: string + description: >- + Live container status, resolved on read from Proxmox run-state and the + create job. + running = online in Proxmox; + offline = exists in Proxmox but stopped; + creating = no Proxmox VM yet, active create job; + failed = no Proxmox VM, create job failed; + missing = no Proxmox VM, create succeeded or no create job; + unknown = Proxmox unreachable / no node credentials. + enum: + - running + - offline + - creating + - failed + - missing + - unknown Node: type: object properties: diff --git a/create-a-container/routers/api/v1/containers.js b/create-a-container/routers/api/v1/containers.js index 70c2947d..8f6fbfe4 100644 --- a/create-a-container/routers/api/v1/containers.js +++ b/create-a-container/routers/api/v1/containers.js @@ -21,6 +21,11 @@ const { const { parseDockerRef, getImageConfig, extractImageMetadata } = require('../../../utils/docker-registry'); const { manageDnsRecords } = require('../../../utils/cloudflare-dns'); const { deleteVirtualMachine, withNetbox } = require('../../../utils/netbox'); +const { + computeContainerStatus, + computeContainerStatuses, + STATUS, +} = require('../../../utils/container-status'); const { apiAuth, asyncHandler, ok, created, ApiError } = require('../../../middlewares/api'); const router = express.Router({ mergeParams: true }); @@ -86,7 +91,7 @@ async function loadSite(req) { return site; } -function serializeContainer(c, site) { +function serializeContainer(c, site, status) { const services = c.services || []; const ssh = services.find( (s) => @@ -110,7 +115,9 @@ function serializeContainer(c, site) { hostname: c.hostname, ipv4Address: c.ipv4Address, macAddress: c.macAddress, - status: c.status, + // Live status computed from Proxmox + jobs + config (see utils/container-status). + // Kept on every container payload so existing consumers remain non-breaking. + status: status || STATUS.UNKNOWN, template: c.template, creationJobId: c.creationJobId, entrypoint: c.entrypoint, @@ -202,10 +209,19 @@ router.get( { association: 'dnsService' }, ], }, - { association: 'node', attributes: ['id', 'name', 'apiUrl'] }, + // Full node record (incl. credentials) is required to query live Proxmox status. + { association: 'node' }, + // Eager-load the create job so status resolution needs no per-container query. + { association: 'creationJob' }, ], }); - return ok(res, rows.map((c) => serializeContainer(c, site))); + // Resolve live statuses for the whole page in one pass: one Proxmox snapshot + // per node (shared), and no per-container DB queries (create job is loaded above). + const statuses = await computeContainerStatuses(rows, Job); + return ok( + res, + rows.map((c) => serializeContainer(c, site, statuses.get(c.id))), + ); }), ); @@ -233,12 +249,14 @@ router.get( ], }, { association: 'node' }, + { association: 'creationJob' }, ], }); if (!c || !c.node || c.node.siteId !== site.id) { throw new ApiError(404, 'not_found', 'Container not found'); } - return ok(res, serializeContainer(c, site)); + const status = await computeContainerStatus({ container: c, Job }); + return ok(res, serializeContainer(c, site, status)); }), ); @@ -296,7 +314,6 @@ router.post( { hostname, username: req.session.user, - status: 'pending', template: templateName, nodeId: node.id, siteId: site.id, @@ -371,7 +388,9 @@ router.post( containerId: container.id, jobId: job.id, hostname: container.hostname, - status: 'pending', + // A create job was just enqueued and there is no Proxmox VMID yet, so the + // live status resolver would report 'creating' — return it directly. + status: STATUS.CREATING, }); } catch (err) { await t.rollback(); @@ -418,12 +437,9 @@ router.put( { environmentVars: envVarsJson, entrypoint: newEntrypoint, - status: needsRestart && container.containerId ? 'restarting' : container.status, }, { transaction: t }, ); - } else if (forceRestart && container.containerId) { - await container.update({ status: 'restarting' }, { transaction: t }); } if (needsRestart && container.containerId) { restartJob = await Job.create( diff --git a/create-a-container/routers/api/v1/nodes.js b/create-a-container/routers/api/v1/nodes.js index d01e097d..2cbb3303 100644 --- a/create-a-container/routers/api/v1/nodes.js +++ b/create-a-container/routers/api/v1/nodes.js @@ -247,7 +247,6 @@ router.post( containerId: c.vmid, macAddress, ipv4Address, - status: 'running', }; }), ); diff --git a/create-a-container/routers/api/v1/resource-requests.js b/create-a-container/routers/api/v1/resource-requests.js index ce047223..5997faef 100644 --- a/create-a-container/routers/api/v1/resource-requests.js +++ b/create-a-container/routers/api/v1/resource-requests.js @@ -217,12 +217,15 @@ router.put( ); /** - * Apply a resource change to existing running containers matching the identity. + * Apply a resource change to existing provisioned containers matching the identity. * This creates a reconfigure job for each matching container. */ async function applyResourceToExistingContainers(siteId, hostname, username, resourceType, value) { const containers = await Container.findAll({ - where: { siteId, hostname, username, status: 'running' }, + // Only containers that exist in Proxmox (have a VMID) can be reconfigured. + // The status column was removed; reconfigure-container.js exits early without + // a containerId, so this is the correct provisioned predicate. + where: { siteId, hostname, username, containerId: { [Op.ne]: null } }, }); if (containers.length === 0) return; diff --git a/create-a-container/routers/templates.js b/create-a-container/routers/templates.js index 4793ba3a..938ce756 100644 --- a/create-a-container/routers/templates.js +++ b/create-a-container/routers/templates.js @@ -1,4 +1,5 @@ const express = require('express'); +const { Op } = require('sequelize'); const { Site, Node, Container, Service, HTTPService, TransportService, ExternalDomain } = require('../models'); const { requireLocalhostOrAdmin } = require('../middlewares'); @@ -12,7 +13,7 @@ async function loadDnsmasqSite(siteId) { include: [{ model: Container, as: 'containers', - where: { status: 'running' }, + where: { ipv4Address: { [Op.ne]: null } }, required: false, attributes: ['macAddress', 'ipv4Address', 'hostname'], }], @@ -40,7 +41,7 @@ router.get('/sites/:siteId/nginx', requireLocalhostOrAdmin, async (req, res) => include: [{ model: Container, as: 'containers', - where: { status: 'running' }, + where: { ipv4Address: { [Op.ne]: null } }, required: false, include: [{ model: Service, diff --git a/create-a-container/utils/container-status.js b/create-a-container/utils/container-status.js new file mode 100644 index 00000000..0c94c9d5 --- /dev/null +++ b/create-a-container/utils/container-status.js @@ -0,0 +1,237 @@ +/** + * container-status.js — resolve the *live* status of a container. + * + * Historically the container "status" was a static column in the database that + * was only mutated by the create/reconfigure job scripts. That value drifts out + * of reality whenever something changes a container directly in Proxmox (or when + * a job dies without updating the row). This module computes the real status on + * demand by combining two sources of truth: + * + * 1. Proxmox — does the LXC exist, and is it running or stopped? This comes + * from a single per-node `clusterResources('lxc')` snapshot. + * 2. Create job — for containers not yet in Proxmox: is the create job still + * active, did it fail, or is it gone? The create job is linked + * to the container by the `creationJobId` foreign key, so this + * is a cheap primary-key lookup (or free when eager-loaded). + * + * Resolved statuses: + * running — exists in Proxmox and is online + * offline — exists in Proxmox but is stopped + * creating — not in Proxmox, but has an active (pending/running) create job + * failed — not in Proxmox, create job returned failure + * missing — not in Proxmox, create job succeeded or no create job found + * unknown — Proxmox could not be reached / node has no API credentials + * + * Note: there is intentionally no per-container Proxmox config read here. Proxmox + * has no bulk config endpoint, so config-drift detection would be O(N) network + * round-trips for the list page; it was dropped in favour of keeping status + * resolution cheap (one Proxmox call per node, zero per container). + */ + +const STATUS = Object.freeze({ + RUNNING: 'running', + OFFLINE: 'offline', + CREATING: 'creating', + FAILED: 'failed', + MISSING: 'missing', + UNKNOWN: 'unknown', +}); + +const STATUS_VALUES = Object.freeze(Object.values(STATUS)); + +const ACTIVE_JOB_STATUSES = ['pending', 'running']; + +function nodeHasCreds(node) { + return !!(node && node.apiUrl && node.tokenId && node.secret); +} + +/** + * Resolve the create job for a container via its `creationJobId` foreign key. + * Prefers an already-loaded `creationJob` association (zero queries); otherwise + * does a single primary-key lookup. Returns null if the container has no linked + * create job. + * @param {object} container - Container instance (ideally with creationJob assoc) + * @param {object} Job - Job model + * @returns {Promise} + */ +async function findCreateJob(container, Job) { + if (container.creationJob) return container.creationJob; + if (container.creationJobId) return Job.findByPk(container.creationJobId); + return null; +} + +function isActiveJob(job) { + return !!job && ACTIVE_JOB_STATUSES.includes(job.status); +} + +/** + * Locate a container in a cluster-resources snapshot by VMID. + * @param {Array|null} snapshot - Result of ProxmoxApi.clusterResources('lxc') + * @param {number} vmid + * @returns {object|null} The matching resource entry or null + */ +function findInSnapshot(snapshot, vmid) { + if (!Array.isArray(snapshot) || vmid == null) return null; + return snapshot.find((r) => Number(r.vmid) === Number(vmid)) || null; +} + +/** + * Resolve status from already-gathered facts. Pure function — no I/O — so it is + * trivial to unit test and is the single source of the decision tree. + * + * @param {object} facts + * @param {boolean} facts.proxmoxReachable - Whether Proxmox was queried OK. + * @param {boolean} facts.inProxmox - Whether the LXC exists in Proxmox. + * @param {boolean} facts.proxmoxRunning - Whether the LXC is running. + * @param {boolean} facts.hasVmid - Whether the container has a Proxmox VMID. + * @param {boolean} facts.creating - Active create job present. + * @param {boolean} facts.createFailed - Create job returned failure. + * @returns {string} STATUS value + */ +function decideStatus(facts) { + if (facts.inProxmox) { + return facts.proxmoxRunning ? STATUS.RUNNING : STATUS.OFFLINE; + } + + if (facts.creating) return STATUS.CREATING; + + // We had a VMID + creds but couldn't reach Proxmox: don't guess missing. + if (facts.hasVmid && !facts.proxmoxReachable) return STATUS.UNKNOWN; + + if (facts.createFailed) return STATUS.FAILED; + return STATUS.MISSING; +} + +/** + * Compute the live status of a single container. + * + * @param {object} params + * @param {object} params.container - Container instance (with `node` association, + * and ideally the `creationJob` association eager-loaded). + * @param {object} params.Job - Job model (from models). + * @param {object} [params.api] - Optional pre-authenticated ProxmoxApi client for + * the container's node (lets callers reuse one client across many containers). + * @param {{ data: Array, ok: boolean }} [params.snapshot] - Optional + * pre-fetched clusterResources('lxc') snapshot for the node. `ok` indicates the + * query succeeded; `data` is the resource list (used for batching the index). + * @returns {Promise} One of the STATUS values. + */ +async function computeContainerStatus({ container, Job, api, snapshot }) { + const node = container.node; + const hasCreds = nodeHasCreds(node); + const hasVmid = container.containerId != null; + + // --- Determine Proxmox presence / run state --- + let proxmoxReachable = false; + let inProxmox = false; + let proxmoxRunning = false; + + if (hasVmid && hasCreds) { + try { + let resources; + if (snapshot) { + proxmoxReachable = !!snapshot.ok; + resources = snapshot.ok ? snapshot.data : null; + } else { + // Single-container path (no shared snapshot): fetch this node's once. + const client = api || (await node.api()); + resources = await client.clusterResources('lxc'); + proxmoxReachable = true; + } + if (proxmoxReachable) { + const resource = findInSnapshot(resources, container.containerId); + if (resource) { + inProxmox = true; + proxmoxRunning = resource.status === 'running'; + } + } + } catch (err) { + proxmoxReachable = false; + } + } + + // Only inspect the create job when the container isn't in Proxmox, to + // distinguish creating / failed / missing. The create job is linked by FK + // (creationJobId), so this is a cheap PK lookup or free when eager-loaded. + let creating = false; + let createFailed = false; + if (!inProxmox) { + const createJob = await findCreateJob(container, Job); + creating = isActiveJob(createJob); + createFailed = !!createJob && createJob.status === 'failure'; + } + + return decideStatus({ + proxmoxReachable, + inProxmox, + proxmoxRunning, + hasVmid, + creating, + createFailed, + }); +} + +/** + * Compute live statuses for many containers efficiently. + * + * No per-container Proxmox calls and no per-container DB queries are issued: + * - Proxmox: containers are grouped by node so each node's + * `clusterResources('lxc')` snapshot is fetched exactly once. + * - Create job: resolved from each container's eager-loaded `creationJob` + * association (or its creationJobId FK). + * + * @param {Array} containers - Container instances (each with `node`, and + * ideally the `creationJob` association loaded to avoid per-container queries). + * @param {object} Job - Job model. + * @returns {Promise>} Map of container.id -> STATUS value. + */ +async function computeContainerStatuses(containers, Job) { + const result = new Map(); + + // Group by node id (containers without a node still get resolved, just without + // any Proxmox facts). + const byNode = new Map(); + for (const container of containers) { + const key = container.node ? container.node.id : `__no_node_${container.id}`; + if (!byNode.has(key)) byNode.set(key, []); + byNode.get(key).push(container); + } + + await Promise.all( + Array.from(byNode.values()).map(async (group) => { + const node = group[0].node; + let snapshot = { ok: false, data: null }; + + if (nodeHasCreds(node)) { + try { + const api = await node.api(); + const data = await api.clusterResources('lxc'); + snapshot = { ok: true, data }; + } catch (err) { + snapshot = { ok: false, data: null }; + } + } + + // Resolution is now CPU-only per container (no Proxmox/DB I/O when the + // create job is eager-loaded), so the snapshot is reused for all of them. + for (const container of group) { + // eslint-disable-next-line no-await-in-loop + const status = await computeContainerStatus({ container, Job, snapshot }); + result.set(container.id, status); + } + }), + ); + + return result; +} + +module.exports = { + STATUS, + STATUS_VALUES, + computeContainerStatus, + computeContainerStatuses, + decideStatus, + // exported for reuse / testing + findInSnapshot, + findCreateJob, +};