diff --git a/containers/spindle-slurm-ubuntu/testing-plugin/conf/slurm.conf b/containers/spindle-slurm-ubuntu/testing-plugin/conf/slurm.conf index abf060d5..e4298157 100644 --- a/containers/spindle-slurm-ubuntu/testing-plugin/conf/slurm.conf +++ b/containers/spindle-slurm-ubuntu/testing-plugin/conf/slurm.conf @@ -34,6 +34,7 @@ JobAcctGatherFrequency=30 AccountingStorageType=accounting_storage/slurmdbd AccountingStorageHost=slurm-db AccountingStoragePort=6819 +PrologFlags=Contain NodeName=slurm-node-1 NodeAddr=slurm-node-1 CPUs=3 RealMemory=1000 State=UNKNOWN NodeName=slurm-node-2 NodeAddr=slurm-node-2 CPUs=3 RealMemory=1000 State=UNKNOWN NodeName=slurm-node-3 NodeAddr=slurm-node-3 CPUs=3 RealMemory=1000 State=UNKNOWN diff --git a/doc/slurm_plugin.md b/doc/slurm_plugin.md new file mode 100644 index 00000000..ab8765c1 --- /dev/null +++ b/doc/slurm_plugin.md @@ -0,0 +1,115 @@ +Spindle Slurm plugin +==================== + +The Spindle Slurm plugin integrates Spindle into Slurm through the +SPANK interface as an alternative launch mechanism to the srun wrapper. +It adds the ability to launch job steps using `srun --spindle`. + +## Building and configuring the plugin + +Configure Spindle with `--enable-slurm-plugin`: + +```bash +./configure --with-rm=slurm-plugin --enable-slurm-plugin [--with-slurm-dir=/path/to/slurm] ... +make +make install +``` + +Refer to `INSTALL` for more details on configuring Spindle. + +After installation of Spindle, the plugin is installed at +`$PREFIX/lib/libspindleslurm.so`. It is registered with Slurm by adding the +following line to `/etc/slurm/plugstack.conf`: + +``` +required /path/to/spindle/lib/libspindleslurm.so +``` + +## Session launch modes + +The manner in which Spindle sessions are started varies depending on +the configuration of Spindle and of Slurm. + +When starting a session, the plugin must arrange for Spindle to start +on each compute node before any step runs within the allocation. +The most straightforward way to do this is to configure the cluster +to run job prologs at allocation time. If your `slurm.conf` includes +`PrologFlags=Alloc` (or another flag that implies it: `Contain`, +`RunInJob`, `X11`, `ForceRequeueOnFail`, or `NoHold`), then sessions +will be started on each node of the allocation at the time the allocation +is made. + +If `PrologFlags=Alloc` or a related setting is *not* used, one of two +mechanisms is used to start the job on every node: + +**RSH launch**: Spindle can use RSH/SSH to launch daemons from the +frontend (FE) process. To use the RSH launch mode, the cluster must be configured +such that passwordless ssh can be used to run commands on every compute +node within the allocation without any interactive user input. +This mode is enabled by configuring Spindle with: + +```bash +./configure --with-rm=slurm-plugin --enable-slurm-plugin --with-rsh-launch [--with-rsh-cmd=/usr/bin/ssh] ... +``` + +**Dummy srun fallback**: If neither `PrologFlags=Alloc` nor RSH launch is available, +Spindle will fall back on using a dummy `srun` invocation to force the prolog +to run on every compute node of the allocation. Note that this has the side-effect +of consuming step 0, so that the user's first step will instead be numbered 1. + +## Using Spindle through the Slurm plugin + +### Per-step mode: `--spindle` + +Add `--spindle` to any `srun` command to use Spindle for that step. +Spindle daemons start before the application runs and shut down when +the step finishes. + +```bash +srun --spindle ./my_application +``` + +Additional arguments can be passed to Spindle as an optional value of the argument `--spindle`: + +```bash +srun --spindle="--level=low" ./my_application +``` + +### Session mode: `--spindle-session` + +Session mode shares a Spindle session across multiple steps. +The use of sessions in the Slurm plugin differs from its use with +the other launchers. Unlike the other launchers, sessions are *not* +started with `spindle --start-session`. Rather, an additional argument +`--spindle-session` is added to `salloc` and `sbatch`. + +To use a session, include `--spindle-session` when creating the allocation: + +```bash +salloc --spindle-session ... +``` + +Then run steps with `--spindle`: + +```bash +srun --spindle ./app1 +srun --spindle ./app2 +srun --spindle ./app3 +``` + +All steps within the allocation will run in the same Spindle session. +When the allocation exits, the session will terminate automatically. + +Sessions can be used with an `sbatch` script as shown below: + +```bash +#!/bin/bash +#SBATCH --spindle-session +#SBATCH -N 4 +#SBATCH -n 4 + +srun --spindle ./app1 +srun --spindle ./app2 +srun --spindle ./app3 +``` + diff --git a/src/slurm_plugin/plugin_utils.c b/src/slurm_plugin/plugin_utils.c index 9a00e111..e9bb1ad4 100644 --- a/src/slurm_plugin/plugin_utils.c +++ b/src/slurm_plugin/plugin_utils.c @@ -254,11 +254,11 @@ int isFEHost(char **hostlist, unsigned int num_hosts) int feresult = -1; for (i = 0; i < num_hosts; i++) { - if (!last_host || strcmp(hostlist[i], last_host) == 1) { + if (!last_host || strcmp(hostlist[i], last_host) > 0) { last_host = hostlist[i]; } } - sdprintf(2, "last_host = %s\n", last_host ? last_host : NULL); + sdprintf(2, "last_host = %s\n", last_host ? last_host : "(null)"); if (!last_host) { error = errno; sdprintf(1, "ERROR: Could not get current system's hostname: %s\n", strerror(error)); @@ -517,6 +517,15 @@ int signalSpankSessionEnd(spindle_args_t *params) char *unique_file = NULL; +void cleanup_unique_file() +{ + if (unique_file) { + unlink(unique_file); + free(unique_file); + unique_file = NULL; + } +} + #define UNIQUE_FILE_NAME "spindle_unique" int isBEProc(spindle_args_t *params, unsigned int exit_phase) @@ -689,8 +698,10 @@ void push_env(spank_t spank, saved_env_t **env) e->new_spindledebug = readSpankEnv(spank, "SPINDLE_DEBUG"); e->old_spindledebug = getenv("SPINDLE_DEBUG"); - if (e->new_pwd) - chdir(e->new_pwd); + if (e->new_pwd) { + if (chdir(e->new_pwd) == -1) + sdprintf(1, "WARNING: Could not chdir to %s: %s\n", e->new_pwd, strerror(errno)); + } if (e->new_home) setenv("HOME", e->new_home, 1); @@ -729,8 +740,10 @@ void pop_env(saved_env_t *env) else unsetenv("SPINDLE_DEBUG"); - if (env->old_pwd) - chdir(env->old_pwd); + if (env->old_pwd) { + if (chdir(env->old_pwd) == -1) + sdprintf(1, "WARNING: Could not chdir to %s: %s\n", env->old_pwd, strerror(errno)); + } if (env->new_home) free(env->new_home); @@ -817,7 +830,7 @@ int dropPrivilegeAndRun(dpr_function_t func, uid_t uid, void *input, char **outp exit(-1); } if (output_len) { - result = safe_write(pipe_fds[1], output_str, output_len+1); + result = safe_write(pipe_fds[1], child_output_str, output_len+1); if (result != output_len+1) { error = errno; fprintf(stderr, "Spindle error. Could not write result string to pipe: %s\n", strerror(error)); @@ -991,7 +1004,11 @@ pid_t grandchild_fork() int result, fork_result = -1; pipe_fds[0] = pipe_fds[1] = -1; - pipe(pipe_fds); + result = pipe(pipe_fds); + if (result == -1) { + sdprintf(1, "ERROR: pipe() failed in grandchild_fork. Aborting spindle\n"); + return -1; + } child_pid = fork(); if (child_pid == -1) { diff --git a/src/slurm_plugin/plugin_utils.h b/src/slurm_plugin/plugin_utils.h index 0ac2c8c8..a29b9805 100644 --- a/src/slurm_plugin/plugin_utils.h +++ b/src/slurm_plugin/plugin_utils.h @@ -45,6 +45,7 @@ char **getHostsParse(unsigned int num_hosts, const char *shortlist); int isFEHost(char **hostlist, unsigned int num_hosts); extern char *unique_file; +void cleanup_unique_file(); int isBEProc(spindle_args_t *params, unsigned int exit_phase); int doesFEExitSocketExist(spindle_args_t *params); diff --git a/src/slurm_plugin/slurm_plugin.c b/src/slurm_plugin/slurm_plugin.c index f2fedfeb..50226d7e 100644 --- a/src/slurm_plugin/slurm_plugin.c +++ b/src/slurm_plugin/slurm_plugin.c @@ -27,6 +27,7 @@ Place, Suite 330, Boston, MA 02111-1307 USA #include #include +#include #include #include "spindle_launch.h" @@ -102,6 +103,7 @@ static __thread spank_t current_spank; static const char *user_options = NULL; static int enable_spindle = 0; static int start_session = 0; +static int prolog_alloc_mode = 0; extern char *parse_location(char *loc, number_t number); @@ -155,7 +157,14 @@ static int should_use_session(spank_t spank) { if (session_env) return 1; err = spank_option_getopt(spank, &session_option, NULL); - return (err == ESPANK_SUCCESS); + return (err == ESPANK_SUCCESS); + } + + /* In the allocator, we are running in the same process that + * handled the command line arguments and can check the + * flag directly. */ + if (context == S_CTX_ALLOCATOR) { + return start_session; } return 0; @@ -167,6 +176,23 @@ int slurm_spank_init(spank_t spank, int ac, char *argv[]) { if (context == S_CTX_ALLOCATOR) { spank_option_register(spank, &session_option); } + +#if defined(PROLOG_FLAG_ALLOC) + /* Check whether Slurm is configured to run the prolog at + * allocation time (PROLOG_FLAG_ALLOC), or the default mode + * where the prolog runs on the first step. */ + if (!prolog_alloc_mode) { + slurm_conf_t *conf = NULL; + if (slurm_load_ctl_conf(0, &conf) == SLURM_SUCCESS) { + if (conf->prolog_flags & PROLOG_FLAG_ALLOC) + prolog_alloc_mode = 1; + slurm_free_ctl_conf(conf); + } else { + sdprintf(1, "Could not read Slurm config, falling back to non-prolog launch.\n"); + } + } +#endif + return 0; } @@ -185,6 +211,17 @@ int slurm_spank_init_post_opt(spank_t spank, int ac, char *argv[]) { if (start_session) { setenv(SPANK_SPINDLE_USE_SESSION, "1", 1); } + + /* With PrologFlags=Alloc, forward env vars to job control here; + them. Without PrologFlags=Alloc, this forwarding happens later + in local context (srun). */ + if (prolog_alloc_mode && start_session) { + int result = forward_environment_to_job_control(spank); + if (result == -1) { + slurm_error("ERROR: Spindle plugin error. Unable to forward environment variables to job control.\n"); + return result; + } + } } return 0; } @@ -313,10 +350,13 @@ int slurm_spank_local_user_init(spank_t spank, int ac, char *argv[]) goto done; } - use_session = should_use_session(spank); + use_session = should_use_session(spank); if (!use_session) goto done; + if (prolog_alloc_mode) + goto done; + result = process_spindle_args(spank, ac, argv, ¶ms, NULL, NULL, use_session); if (result == -1) { slurm_error("ERROR: Spindle plugin error. Could not process spindle args in local user init.\n"); @@ -383,15 +423,18 @@ int slurm_spank_job_prolog(spank_t spank, int ac, char *argv[]) { return 0; - envVal = getenv("SPANK_SPINDLE_RSHLAUNCH"); - if (envVal && strcmp(envVal, "1") == 0) - return 0; + if (!prolog_alloc_mode) { + envVal = getenv("SPANK_SPINDLE_RSHLAUNCH"); + if (envVal && strcmp(envVal, "1") == 0) + return 0; + } // The prolog starts in the user's home directory. // Change to $SLURM_JOB_WORK_DIR so logs go to right place. work_dir = getenv("SLURM_JOB_WORK_DIR"); if (work_dir) { - chdir(work_dir); + if (chdir(work_dir) == -1) + sdprintf(1, "WARNING: Could not chdir to %s: %s\n", work_dir, strerror(errno)); } err = spank_get_item(spank, S_JOB_UID, &userid); @@ -504,11 +547,13 @@ int slurm_spank_task_init(spank_t spank, int site_argc, char *site_argv[]) } if (params.opts & OPT_OFF) { + pop_env(env); return 0; } - /* When using a session without RSHLAUNCH, handle start in job prolog, not here. */ - if ((!use_session) || (params.opts & OPT_RSHLAUNCH)) { + /* When using a session without RSHLAUNCH, handle start in job prolog, not here. + With PrologFlags=Alloc, session+RSHLAUNCH is also handled in the prolog. */ + if ((!use_session) || ((params.opts & OPT_RSHLAUNCH) && !prolog_alloc_mode)) { start_params.spank = spank; start_params.site_argc = site_argc; start_params.site_argv = site_argv; @@ -559,8 +604,9 @@ static int handleStart(void *params, char **output_str) return 0; } - // Only initialize a session once - if (use_session && (args.opts & OPT_RSHLAUNCH)) { + /* Only initialize a session once. In prolog context (S_CTX_JOB_SCRIPT), + * there is no step yet, so skip the step ID check. */ + if (use_session && (args.opts & OPT_RSHLAUNCH) && spank_context() != S_CTX_JOB_SCRIPT) { err = get_stepid(spank, &stepid); if (err != ESPANK_SUCCESS) { slurm_error("ERROR: Spindle plugin error. Could not get step id."); @@ -1169,9 +1215,7 @@ static int launchBE(spank_t spank, spindle_args_t *params) else sdprintf(1, "spindleRunBE completed. Session finishing.\n"); - if (unique_file) unlink(unique_file); - free(unique_file); - unique_file = NULL; + cleanup_unique_file(); exit(result);