From e7c9d8f8bb5d5cb4ab04906a7fcc6d4ff3a61183 Mon Sep 17 00:00:00 2001 From: Stackie Jia Date: Tue, 5 May 2026 16:05:28 +0800 Subject: [PATCH 1/5] fix(status): self-heal stale shm file and fail fast on init error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The shared memory file at /tmp/rtp2httpd_status_ is keyed solely on the supervisor PID, so a previous instance that exited abnormally (SIGKILL, OOM, power loss) leaves a stale file behind. When PID reuse hits — common in containers with restart=always (supervisor is always PID 1), embedded systems with small pid_max, or any environment where /tmp is persistent — status_init() failed with EEXIST and the supervisor "continued anyway", forking workers that immediately segfaulted because most code paths dereference status_shared without NULL checks (zerocopy_init, buffer_pool, status_*, worker disconnect handling, etc.). Two fixes: 1. status_init() retries open(O_EXCL) once after unlinking on EEXIST. The path is bound to our own PID and no other live process in the same PID namespace can hold that PID, so the leftover is safe to remove. 2. main() now treats status_init() failure as fatal and exits, instead of producing a SIGSEGV restart loop that hits the supervisor's restart rate limiter. Co-Authored-By: Claude Opus 4.7 --- src/rtp2httpd.c | 10 +++++++--- src/status.c | 18 +++++++++++++++++- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/rtp2httpd.c b/src/rtp2httpd.c index 64cfbe3f..2ae61af3 100644 --- a/src/rtp2httpd.c +++ b/src/rtp2httpd.c @@ -11,10 +11,14 @@ int worker_id = SUPERVISOR_WORKER_ID; /* Worker ID for this process (0-based) */ int main(int argc, char *argv[]) { parse_cmd_line(argc, argv); - /* Initialize status tracking system (before fork, shared memory) */ + /* Initialize status tracking system (before fork, shared memory). + * This is fatal: many code paths in workers dereference status_shared + * without NULL checks (zerocopy_init, buffer_pool, logger fan-out, etc.), + * so continuing here would just produce a SIGSEGV restart loop in the + * supervisor. */ if (status_init() != 0) { - logger(LOG_ERROR, "Failed to initialize status tracking"); - /* Continue anyway - status page won't work but streaming will */ + logger(LOG_FATAL, "Failed to initialize status tracking, exiting"); + return 1; } logger(LOG_INFO, "Starting rtp2httpd with %d worker(s)", config.workers); diff --git a/src/status.c b/src/status.c index 7618f2db..c3490eda 100644 --- a/src/status.c +++ b/src/status.c @@ -56,9 +56,25 @@ static char shm_path[256] = {0}; int status_init(void) { int fd; - /* Create shared memory file in /tmp */ + /* Create shared memory file in /tmp. + * The path is keyed on the supervisor PID, so a pre-existing file with the + * same name can only be a stale leftover from a previous instance that did + * not exit cleanly (SIGKILL, OOM, power loss) and happened to have the same + * PID. Within a single PID namespace no other live process can hold this + * PID, so the leftover is safe to unlink. This self-heal matters in + * environments where /tmp is persistent (some embedded/OpenWrt setups) or + * where PID reuse is highly likely (containers with restart=always always + * give the supervisor PID 1, small pid_max on embedded systems). */ snprintf(shm_path, sizeof(shm_path), "/tmp/rtp2httpd_status_%d", getpid()); fd = open(shm_path, O_CREAT | O_RDWR | O_EXCL, 0600); + if (fd == -1 && errno == EEXIST) { + logger(LOG_WARN, "Stale shared memory file %s found, removing and retrying", shm_path); + if (unlink(shm_path) == -1 && errno != ENOENT) { + logger(LOG_ERROR, "Failed to unlink stale shared memory file: %s", strerror(errno)); + return -1; + } + fd = open(shm_path, O_CREAT | O_RDWR | O_EXCL, 0600); + } if (fd == -1) { logger(LOG_ERROR, "Failed to create shared memory file: %s", strerror(errno)); return -1; From c2f3bbb500ec9f0ab4a3fe13e3dfdaba90f295c1 Mon Sep 17 00:00:00 2001 From: Stackie Jia Date: Tue, 5 May 2026 16:11:14 +0800 Subject: [PATCH 2/5] refactor(status): trim comments per CLAUDE.md guidance Drop rot-prone references (specific call sites) and PR-context examples (OpenWrt, container scenarios). Keep only the non-obvious WHY: the PID-namespace invariant that justifies unlink-and-retry, and the reason status_init failure must be fatal. Co-Authored-By: Claude Opus 4.7 --- src/rtp2httpd.c | 7 ++----- src/status.c | 12 +++--------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/src/rtp2httpd.c b/src/rtp2httpd.c index 2ae61af3..a76d0f87 100644 --- a/src/rtp2httpd.c +++ b/src/rtp2httpd.c @@ -11,11 +11,8 @@ int worker_id = SUPERVISOR_WORKER_ID; /* Worker ID for this process (0-based) */ int main(int argc, char *argv[]) { parse_cmd_line(argc, argv); - /* Initialize status tracking system (before fork, shared memory). - * This is fatal: many code paths in workers dereference status_shared - * without NULL checks (zerocopy_init, buffer_pool, logger fan-out, etc.), - * so continuing here would just produce a SIGSEGV restart loop in the - * supervisor. */ + /* Fatal: workers dereference status_shared without NULL checks; continuing + * would just SIGSEGV-loop the supervisor. */ if (status_init() != 0) { logger(LOG_FATAL, "Failed to initialize status tracking, exiting"); return 1; diff --git a/src/status.c b/src/status.c index c3490eda..711d6766 100644 --- a/src/status.c +++ b/src/status.c @@ -56,15 +56,9 @@ static char shm_path[256] = {0}; int status_init(void) { int fd; - /* Create shared memory file in /tmp. - * The path is keyed on the supervisor PID, so a pre-existing file with the - * same name can only be a stale leftover from a previous instance that did - * not exit cleanly (SIGKILL, OOM, power loss) and happened to have the same - * PID. Within a single PID namespace no other live process can hold this - * PID, so the leftover is safe to unlink. This self-heal matters in - * environments where /tmp is persistent (some embedded/OpenWrt setups) or - * where PID reuse is highly likely (containers with restart=always always - * give the supervisor PID 1, small pid_max on embedded systems). */ + /* PID-keyed path: EEXIST can only be a stale leftover from a prior instance + * with the same PID (no live process can hold our PID in this namespace), + * so unlink-and-retry is safe. */ snprintf(shm_path, sizeof(shm_path), "/tmp/rtp2httpd_status_%d", getpid()); fd = open(shm_path, O_CREAT | O_RDWR | O_EXCL, 0600); if (fd == -1 && errno == EEXIST) { From 8a0fcf4f93cdead754aac0f5bc68be00001180bf Mon Sep 17 00:00:00 2001 From: Stackie Jia Date: Tue, 5 May 2026 16:14:33 +0800 Subject: [PATCH 3/5] fix(status): keep status_shared NULL on init failure logger() in utils.c gates on status_shared being non-NULL, but mmap() sets it to MAP_FAILED ((void*)-1) on failure, and the pipe-creation failure path calls munmap() without clearing the pointer. Either case turned the subsequent error log into a SIGSEGV, defeating the fail-fast behavior added in the previous commit. - Stage mmap() into a local until success, then assign status_shared. - Reset status_shared to NULL after munmap() in the pipe-failure path. Reported by Copilot review on PR #464. Co-Authored-By: Claude Opus 4.7 --- src/status.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/status.c b/src/status.c index 711d6766..13e914d5 100644 --- a/src/status.c +++ b/src/status.c @@ -82,14 +82,20 @@ int status_init(void) { return -1; } - /* Map shared memory */ - status_shared = mmap(NULL, sizeof(status_shared_t), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (status_shared == MAP_FAILED) { - logger(LOG_ERROR, "Failed to map shared memory: %s", strerror(errno)); + /* Map shared memory. + * logger() probes status_shared with a NULL check, not a MAP_FAILED check, + * so we must reset to NULL before logging or any failure path that calls + * logger() will dereference (void*)-1. */ + void *mapped = mmap(NULL, sizeof(status_shared_t), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (mapped == MAP_FAILED) { + int err = errno; + status_shared = NULL; + logger(LOG_ERROR, "Failed to map shared memory: %s", strerror(err)); close(fd); unlink(shm_path); return -1; } + status_shared = mapped; /* Close file descriptor immediately after mmap() * Per POSIX: "closing the file descriptor does not unmap the region" @@ -124,6 +130,7 @@ int status_init(void) { close(status_shared->worker_notification_pipes[j]); } munmap(status_shared, sizeof(status_shared_t)); + status_shared = NULL; unlink(shm_path); return -1; } From e5ad6f616982848733c02cb1660ecf24ddde987d Mon Sep 17 00:00:00 2001 From: Stackie Jia Date: Tue, 5 May 2026 16:29:42 +0800 Subject: [PATCH 4/5] fix(status): flock-based liveness check; full init before publish MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues raised in PR #464 review: 1. Cross-namespace PID collision: the path /tmp/rtp2httpd_status_ is only unique within a PID namespace. Two containers sharing /tmp can legitimately have the same PID, and the previous unlink-on-EEXIST recovery would silently destroy the sibling instance's directory entry, causing later cross-cleanup races. Replace the heuristic with an exclusive non-blocking flock(2): on EEXIST, open the existing file and try to lock it. Lock acquired ⇒ no live owner ⇒ stale, safe to unlink. Lock busy ⇒ refuse with a clear error rather than corrupting the sibling. The fresh fd retains the flock for the lifetime of the daemon and is inherited by workers (per-OFD lock semantics keep it held until the last process closes its copy), so future recovery attempts can detect us. 2. Pipe-creation failure path used logger() with status_shared non-NULL but log_mutex still uninitialized, so logger() → status_add_log_entry → pthread_mutex_lock on a zero mutex (UB; not portable). Stage all initialization on a local pointer and only assign status_shared after the struct is fully usable (mutexes init'd, pipes created). All failure paths munmap the local and return with status_shared still NULL, so logger() falls through its NULL guard. Reported by Copilot review on PR #464. Co-Authored-By: Claude Opus 4.7 --- src/status.c | 139 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 86 insertions(+), 53 deletions(-) diff --git a/src/status.c b/src/status.c index 13e914d5..ed0ceaaf 100644 --- a/src/status.c +++ b/src/status.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -53,20 +54,45 @@ status_shared_t *status_shared = NULL; /* Path for shared memory file in /tmp */ static char shm_path[256] = {0}; +/* Held for the lifetime of the daemon. The exclusive flock on this fd + * advertises liveness so a future recovery attempt with the same PID (e.g. + * after PID reuse) can distinguish a stale leftover from an actively running + * sibling instance (e.g. another container sharing /tmp). Workers inherit + * this fd via fork(); the lock is per open file description so the + * supervisor's copy keeps it held even after workers close theirs. */ +static int shm_lock_fd = -1; + int status_init(void) { - int fd; + status_shared_t *shared = NULL; + int fd = -1; + int err; - /* PID-keyed path: EEXIST can only be a stale leftover from a prior instance - * with the same PID (no live process can hold our PID in this namespace), - * so unlink-and-retry is safe. */ snprintf(shm_path, sizeof(shm_path), "/tmp/rtp2httpd_status_%d", getpid()); + fd = open(shm_path, O_CREAT | O_RDWR | O_EXCL, 0600); if (fd == -1 && errno == EEXIST) { - logger(LOG_WARN, "Stale shared memory file %s found, removing and retrying", shm_path); + /* The path is keyed only on PID, which is not unique across PID + * namespaces sharing /tmp. Use a non-blocking exclusive flock to tell + * stale-from-crash apart from actively-held-by-sibling: if we can lock + * it, no live process is using the file and unlink-and-recreate is safe; + * if not, refuse rather than corrupting the sibling's state. */ + int existing = open(shm_path, O_RDWR); + if (existing == -1) { + logger(LOG_ERROR, "Failed to open existing shared memory file %s: %s", shm_path, strerror(errno)); + return -1; + } + if (flock(existing, LOCK_EX | LOCK_NB) == -1) { + err = errno; + close(existing); + logger(LOG_ERROR, "Shared memory file %s is held by another live instance: %s", shm_path, strerror(err)); + return -1; + } + close(existing); /* releases this transient lock; the file is about to go */ if (unlink(shm_path) == -1 && errno != ENOENT) { logger(LOG_ERROR, "Failed to unlink stale shared memory file: %s", strerror(errno)); return -1; } + logger(LOG_WARN, "Removed stale shared memory file %s", shm_path); fd = open(shm_path, O_CREAT | O_RDWR | O_EXCL, 0600); } if (fd == -1) { @@ -74,46 +100,51 @@ int status_init(void) { return -1; } - /* Set size of shared memory */ - if (ftruncate(fd, sizeof(status_shared_t)) == -1) { - logger(LOG_ERROR, "Failed to set shared memory size: %s", strerror(errno)); + if (flock(fd, LOCK_EX | LOCK_NB) == -1) { + err = errno; close(fd); unlink(shm_path); + logger(LOG_ERROR, "Failed to lock shared memory file: %s", strerror(err)); return -1; } - /* Map shared memory. - * logger() probes status_shared with a NULL check, not a MAP_FAILED check, - * so we must reset to NULL before logging or any failure path that calls - * logger() will dereference (void*)-1. */ - void *mapped = mmap(NULL, sizeof(status_shared_t), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (mapped == MAP_FAILED) { - int err = errno; - status_shared = NULL; - logger(LOG_ERROR, "Failed to map shared memory: %s", strerror(err)); + if (ftruncate(fd, sizeof(status_shared_t)) == -1) { + err = errno; close(fd); unlink(shm_path); + logger(LOG_ERROR, "Failed to set shared memory size: %s", strerror(err)); return -1; } - status_shared = mapped; - - /* Close file descriptor immediately after mmap() - * Per POSIX: "closing the file descriptor does not unmap the region" - * This is best practice and avoids fd management issues after fork() */ - close(fd); - /* Initialize shared memory structure */ - memset(status_shared, 0, sizeof(status_shared_t)); - status_shared->server_start_time = get_realtime_ms(); - status_shared->current_log_level = config.verbosity; - status_shared->event_counter = 0; + /* Stage everything on a local pointer; only publish to status_shared after + * the struct is fully usable. This keeps logger() (which gates on + * status_shared != NULL and may take log_mutex) from observing a + * partially-initialized region from any failure path. */ + shared = mmap(NULL, sizeof(status_shared_t), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (shared == MAP_FAILED) { + err = errno; + close(fd); + unlink(shm_path); + logger(LOG_ERROR, "Failed to map shared memory: %s", strerror(err)); + return -1; + } - /* Initialize pipe fds to -1 (invalid) */ + memset(shared, 0, sizeof(status_shared_t)); + shared->server_start_time = get_realtime_ms(); + shared->current_log_level = config.verbosity; + shared->event_counter = 0; for (int i = 0; i < STATUS_MAX_WORKERS; i++) { - status_shared->worker_notification_pipe_read_fds[i] = -1; - status_shared->worker_notification_pipes[i] = -1; + shared->worker_notification_pipe_read_fds[i] = -1; + shared->worker_notification_pipes[i] = -1; } + pthread_mutexattr_t mutex_attr; + pthread_mutexattr_init(&mutex_attr); + pthread_mutexattr_setpshared(&mutex_attr, PTHREAD_PROCESS_SHARED); + pthread_mutex_init(&shared->log_mutex, &mutex_attr); + pthread_mutex_init(&shared->clients_mutex, &mutex_attr); + pthread_mutexattr_destroy(&mutex_attr); + /* Pre-create notification pipes for all possible workers (STATUS_MAX_WORKERS) * This is done BEFORE fork so all processes inherit the same pipe fds. * Pre-creating all pipes allows future config reload to change worker count @@ -121,39 +152,33 @@ int status_init(void) { for (int i = 0; i < STATUS_MAX_WORKERS; i++) { int pipe_fds[2]; if (pipe(pipe_fds) == -1) { - logger(LOG_ERROR, "Failed to create notification pipe for worker %d: %s", i, strerror(errno)); - /* Clean up already created pipes */ + err = errno; for (int j = 0; j < i; j++) { - if (status_shared->worker_notification_pipe_read_fds[j] != -1) - close(status_shared->worker_notification_pipe_read_fds[j]); - if (status_shared->worker_notification_pipes[j] != -1) - close(status_shared->worker_notification_pipes[j]); + if (shared->worker_notification_pipe_read_fds[j] != -1) + close(shared->worker_notification_pipe_read_fds[j]); + if (shared->worker_notification_pipes[j] != -1) + close(shared->worker_notification_pipes[j]); } - munmap(status_shared, sizeof(status_shared_t)); - status_shared = NULL; + pthread_mutex_destroy(&shared->log_mutex); + pthread_mutex_destroy(&shared->clients_mutex); + munmap(shared, sizeof(status_shared_t)); + close(fd); unlink(shm_path); + logger(LOG_ERROR, "Failed to create notification pipe for worker %d: %s", i, strerror(err)); return -1; } - /* Set read end to non-blocking mode */ int flags = fcntl(pipe_fds[0], F_GETFL, 0); fcntl(pipe_fds[0], F_SETFL, flags | O_NONBLOCK); - /* Store both ends in shared memory - * Read ends will be used by each worker after fork - * Write ends are accessible by all workers for cross-worker notification - */ - status_shared->worker_notification_pipe_read_fds[i] = pipe_fds[0]; - status_shared->worker_notification_pipes[i] = pipe_fds[1]; + shared->worker_notification_pipe_read_fds[i] = pipe_fds[0]; + shared->worker_notification_pipes[i] = pipe_fds[1]; } - /* Initialize mutexes for multi-process safety */ - pthread_mutexattr_t mutex_attr; - pthread_mutexattr_init(&mutex_attr); - pthread_mutexattr_setpshared(&mutex_attr, PTHREAD_PROCESS_SHARED); - pthread_mutex_init(&status_shared->log_mutex, &mutex_attr); - pthread_mutex_init(&status_shared->clients_mutex, &mutex_attr); - pthread_mutexattr_destroy(&mutex_attr); + /* Publish. Keep fd open: it carries the liveness flock and will be closed + * by status_cleanup(). */ + shm_lock_fd = fd; + status_shared = shared; logger(LOG_INFO, "Status tracking initialized"); return 0; @@ -212,6 +237,14 @@ void status_cleanup(void) { status_shared = NULL; } + /* Close this process's copy of the liveness lock fd. The flock is per + * open-file-description, so the lock survives until the last process + * closes its inherited copy. */ + if (shm_lock_fd != -1) { + close(shm_lock_fd); + shm_lock_fd = -1; + } + /* Only the final cleanup process unlinks shared memory file * unlink() removes the shared memory file from the filesystem */ if (is_final_cleanup) { From 93a728fe789c02cf3eabe233bf36d1e9370c8b32 Mon Sep 17 00:00:00 2001 From: Stackie Jia Date: Tue, 5 May 2026 16:32:55 +0800 Subject: [PATCH 5/5] Revert "fix(status): flock-based liveness check; full init before publish" This reverts commit e5ad6f616982848733c02cb1660ecf24ddde987d. --- src/status.c | 139 ++++++++++++++++++++------------------------------- 1 file changed, 53 insertions(+), 86 deletions(-) diff --git a/src/status.c b/src/status.c index ed0ceaaf..13e914d5 100644 --- a/src/status.c +++ b/src/status.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include @@ -54,45 +53,20 @@ status_shared_t *status_shared = NULL; /* Path for shared memory file in /tmp */ static char shm_path[256] = {0}; -/* Held for the lifetime of the daemon. The exclusive flock on this fd - * advertises liveness so a future recovery attempt with the same PID (e.g. - * after PID reuse) can distinguish a stale leftover from an actively running - * sibling instance (e.g. another container sharing /tmp). Workers inherit - * this fd via fork(); the lock is per open file description so the - * supervisor's copy keeps it held even after workers close theirs. */ -static int shm_lock_fd = -1; - int status_init(void) { - status_shared_t *shared = NULL; - int fd = -1; - int err; + int fd; + /* PID-keyed path: EEXIST can only be a stale leftover from a prior instance + * with the same PID (no live process can hold our PID in this namespace), + * so unlink-and-retry is safe. */ snprintf(shm_path, sizeof(shm_path), "/tmp/rtp2httpd_status_%d", getpid()); - fd = open(shm_path, O_CREAT | O_RDWR | O_EXCL, 0600); if (fd == -1 && errno == EEXIST) { - /* The path is keyed only on PID, which is not unique across PID - * namespaces sharing /tmp. Use a non-blocking exclusive flock to tell - * stale-from-crash apart from actively-held-by-sibling: if we can lock - * it, no live process is using the file and unlink-and-recreate is safe; - * if not, refuse rather than corrupting the sibling's state. */ - int existing = open(shm_path, O_RDWR); - if (existing == -1) { - logger(LOG_ERROR, "Failed to open existing shared memory file %s: %s", shm_path, strerror(errno)); - return -1; - } - if (flock(existing, LOCK_EX | LOCK_NB) == -1) { - err = errno; - close(existing); - logger(LOG_ERROR, "Shared memory file %s is held by another live instance: %s", shm_path, strerror(err)); - return -1; - } - close(existing); /* releases this transient lock; the file is about to go */ + logger(LOG_WARN, "Stale shared memory file %s found, removing and retrying", shm_path); if (unlink(shm_path) == -1 && errno != ENOENT) { logger(LOG_ERROR, "Failed to unlink stale shared memory file: %s", strerror(errno)); return -1; } - logger(LOG_WARN, "Removed stale shared memory file %s", shm_path); fd = open(shm_path, O_CREAT | O_RDWR | O_EXCL, 0600); } if (fd == -1) { @@ -100,51 +74,46 @@ int status_init(void) { return -1; } - if (flock(fd, LOCK_EX | LOCK_NB) == -1) { - err = errno; - close(fd); - unlink(shm_path); - logger(LOG_ERROR, "Failed to lock shared memory file: %s", strerror(err)); - return -1; - } - + /* Set size of shared memory */ if (ftruncate(fd, sizeof(status_shared_t)) == -1) { - err = errno; + logger(LOG_ERROR, "Failed to set shared memory size: %s", strerror(errno)); close(fd); unlink(shm_path); - logger(LOG_ERROR, "Failed to set shared memory size: %s", strerror(err)); return -1; } - /* Stage everything on a local pointer; only publish to status_shared after - * the struct is fully usable. This keeps logger() (which gates on - * status_shared != NULL and may take log_mutex) from observing a - * partially-initialized region from any failure path. */ - shared = mmap(NULL, sizeof(status_shared_t), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (shared == MAP_FAILED) { - err = errno; + /* Map shared memory. + * logger() probes status_shared with a NULL check, not a MAP_FAILED check, + * so we must reset to NULL before logging or any failure path that calls + * logger() will dereference (void*)-1. */ + void *mapped = mmap(NULL, sizeof(status_shared_t), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (mapped == MAP_FAILED) { + int err = errno; + status_shared = NULL; + logger(LOG_ERROR, "Failed to map shared memory: %s", strerror(err)); close(fd); unlink(shm_path); - logger(LOG_ERROR, "Failed to map shared memory: %s", strerror(err)); return -1; } + status_shared = mapped; - memset(shared, 0, sizeof(status_shared_t)); - shared->server_start_time = get_realtime_ms(); - shared->current_log_level = config.verbosity; - shared->event_counter = 0; + /* Close file descriptor immediately after mmap() + * Per POSIX: "closing the file descriptor does not unmap the region" + * This is best practice and avoids fd management issues after fork() */ + close(fd); + + /* Initialize shared memory structure */ + memset(status_shared, 0, sizeof(status_shared_t)); + status_shared->server_start_time = get_realtime_ms(); + status_shared->current_log_level = config.verbosity; + status_shared->event_counter = 0; + + /* Initialize pipe fds to -1 (invalid) */ for (int i = 0; i < STATUS_MAX_WORKERS; i++) { - shared->worker_notification_pipe_read_fds[i] = -1; - shared->worker_notification_pipes[i] = -1; + status_shared->worker_notification_pipe_read_fds[i] = -1; + status_shared->worker_notification_pipes[i] = -1; } - pthread_mutexattr_t mutex_attr; - pthread_mutexattr_init(&mutex_attr); - pthread_mutexattr_setpshared(&mutex_attr, PTHREAD_PROCESS_SHARED); - pthread_mutex_init(&shared->log_mutex, &mutex_attr); - pthread_mutex_init(&shared->clients_mutex, &mutex_attr); - pthread_mutexattr_destroy(&mutex_attr); - /* Pre-create notification pipes for all possible workers (STATUS_MAX_WORKERS) * This is done BEFORE fork so all processes inherit the same pipe fds. * Pre-creating all pipes allows future config reload to change worker count @@ -152,33 +121,39 @@ int status_init(void) { for (int i = 0; i < STATUS_MAX_WORKERS; i++) { int pipe_fds[2]; if (pipe(pipe_fds) == -1) { - err = errno; + logger(LOG_ERROR, "Failed to create notification pipe for worker %d: %s", i, strerror(errno)); + /* Clean up already created pipes */ for (int j = 0; j < i; j++) { - if (shared->worker_notification_pipe_read_fds[j] != -1) - close(shared->worker_notification_pipe_read_fds[j]); - if (shared->worker_notification_pipes[j] != -1) - close(shared->worker_notification_pipes[j]); + if (status_shared->worker_notification_pipe_read_fds[j] != -1) + close(status_shared->worker_notification_pipe_read_fds[j]); + if (status_shared->worker_notification_pipes[j] != -1) + close(status_shared->worker_notification_pipes[j]); } - pthread_mutex_destroy(&shared->log_mutex); - pthread_mutex_destroy(&shared->clients_mutex); - munmap(shared, sizeof(status_shared_t)); - close(fd); + munmap(status_shared, sizeof(status_shared_t)); + status_shared = NULL; unlink(shm_path); - logger(LOG_ERROR, "Failed to create notification pipe for worker %d: %s", i, strerror(err)); return -1; } + /* Set read end to non-blocking mode */ int flags = fcntl(pipe_fds[0], F_GETFL, 0); fcntl(pipe_fds[0], F_SETFL, flags | O_NONBLOCK); - shared->worker_notification_pipe_read_fds[i] = pipe_fds[0]; - shared->worker_notification_pipes[i] = pipe_fds[1]; + /* Store both ends in shared memory + * Read ends will be used by each worker after fork + * Write ends are accessible by all workers for cross-worker notification + */ + status_shared->worker_notification_pipe_read_fds[i] = pipe_fds[0]; + status_shared->worker_notification_pipes[i] = pipe_fds[1]; } - /* Publish. Keep fd open: it carries the liveness flock and will be closed - * by status_cleanup(). */ - shm_lock_fd = fd; - status_shared = shared; + /* Initialize mutexes for multi-process safety */ + pthread_mutexattr_t mutex_attr; + pthread_mutexattr_init(&mutex_attr); + pthread_mutexattr_setpshared(&mutex_attr, PTHREAD_PROCESS_SHARED); + pthread_mutex_init(&status_shared->log_mutex, &mutex_attr); + pthread_mutex_init(&status_shared->clients_mutex, &mutex_attr); + pthread_mutexattr_destroy(&mutex_attr); logger(LOG_INFO, "Status tracking initialized"); return 0; @@ -237,14 +212,6 @@ void status_cleanup(void) { status_shared = NULL; } - /* Close this process's copy of the liveness lock fd. The flock is per - * open-file-description, so the lock survives until the last process - * closes its inherited copy. */ - if (shm_lock_fd != -1) { - close(shm_lock_fd); - shm_lock_fd = -1; - } - /* Only the final cleanup process unlinks shared memory file * unlink() removes the shared memory file from the filesystem */ if (is_final_cleanup) {