Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ JobAcctGatherFrequency=30
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageHost=slurm-db
AccountingStoragePort=6819
PrologFlags=Contain
NodeName=slurm-node-1 NodeAddr=slurm-node-1 CPUs=3 RealMemory=1000 State=UNKNOWN
NodeName=slurm-node-2 NodeAddr=slurm-node-2 CPUs=3 RealMemory=1000 State=UNKNOWN
NodeName=slurm-node-3 NodeAddr=slurm-node-3 CPUs=3 RealMemory=1000 State=UNKNOWN
Expand Down
115 changes: 115 additions & 0 deletions doc/slurm_plugin.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
Spindle Slurm plugin
====================

The Spindle Slurm plugin integrates Spindle into Slurm through the
SPANK interface as an alternative launch mechanism to the srun wrapper.
It adds the ability to launch job steps using `srun --spindle`.

## Building and configuring the plugin

Configure Spindle with `--enable-slurm-plugin`:

```bash
./configure --with-rm=slurm-plugin --enable-slurm-plugin [--with-slurm-dir=/path/to/slurm] ...
make
make install
```

Refer to `INSTALL` for more details on configuring Spindle.

After installation of Spindle, the plugin is installed at
`$PREFIX/lib/libspindleslurm.so`. It is registered with Slurm by adding the
following line to `/etc/slurm/plugstack.conf`:

```
required /path/to/spindle/lib/libspindleslurm.so
```

## Session launch modes

The manner in which Spindle sessions are started varies depending on
the configuration of Spindle and of Slurm.

When starting a session, the plugin must arrange for Spindle to start
on each compute node before any step runs within the allocation.
The most straightforward way to do this is to configure the cluster
to run job prologs at allocation time. If your `slurm.conf` includes
`PrologFlags=Alloc` (or another flag that implies it: `Contain`,
`RunInJob`, `X11`, `ForceRequeueOnFail`, or `NoHold`), then sessions
will be started on each node of the allocation at the time the allocation
is made.

If `PrologFlags=Alloc` or a related setting is *not* used, one of two
mechanisms is used to start the job on every node:

**RSH launch**: Spindle can use RSH/SSH to launch daemons from the
frontend (FE) process. To use the RSH launch mode, the cluster must be configured
such that passwordless ssh can be used to run commands on every compute
node within the allocation without any interactive user input.
This mode is enabled by configuring Spindle with:

```bash
./configure --with-rm=slurm-plugin --enable-slurm-plugin --with-rsh-launch [--with-rsh-cmd=/usr/bin/ssh] ...
```

**Dummy srun fallback**: If neither `PrologFlags=Alloc` nor RSH launch is available,
Spindle will fall back on using a dummy `srun` invocation to force the prolog
to run on every compute node of the allocation. Note that this has the side-effect
of consuming step 0, so that the user's first step will instead be numbered 1.

## Using Spindle through the Slurm plugin

### Per-step mode: `--spindle`

Add `--spindle` to any `srun` command to use Spindle for that step.
Spindle daemons start before the application runs and shut down when
the step finishes.

```bash
srun --spindle ./my_application
```

Additional arguments can be passed to Spindle as an optional value of the argument `--spindle`:

```bash
srun --spindle="--level=low" ./my_application
```

### Session mode: `--spindle-session`

Session mode shares a Spindle session across multiple steps.
The use of sessions in the Slurm plugin differs from its use with
the other launchers. Unlike the other launchers, sessions are *not*
started with `spindle --start-session`. Rather, an additional argument
`--spindle-session` is added to `salloc` and `sbatch`.

To use a session, include `--spindle-session` when creating the allocation:

```bash
salloc --spindle-session ...
```

Then run steps with `--spindle`:

```bash
srun --spindle ./app1
srun --spindle ./app2
srun --spindle ./app3
```

All steps within the allocation will run in the same Spindle session.
When the allocation exits, the session will terminate automatically.

Sessions can be used with an `sbatch` script as shown below:

```bash
#!/bin/bash
#SBATCH --spindle-session
#SBATCH -N 4
#SBATCH -n 4

srun --spindle ./app1
srun --spindle ./app2
srun --spindle ./app3
```

33 changes: 25 additions & 8 deletions src/slurm_plugin/plugin_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -254,11 +254,11 @@ int isFEHost(char **hostlist, unsigned int num_hosts)
int feresult = -1;

for (i = 0; i < num_hosts; i++) {
if (!last_host || strcmp(hostlist[i], last_host) == 1) {
if (!last_host || strcmp(hostlist[i], last_host) > 0) {
last_host = hostlist[i];
}
}
sdprintf(2, "last_host = %s\n", last_host ? last_host : NULL);
sdprintf(2, "last_host = %s\n", last_host ? last_host : "(null)");
if (!last_host) {
error = errno;
sdprintf(1, "ERROR: Could not get current system's hostname: %s\n", strerror(error));
Expand Down Expand Up @@ -517,6 +517,15 @@ int signalSpankSessionEnd(spindle_args_t *params)

char *unique_file = NULL;

void cleanup_unique_file()
{
if (unique_file) {
unlink(unique_file);
free(unique_file);
unique_file = NULL;
}
}

#define UNIQUE_FILE_NAME "spindle_unique"

int isBEProc(spindle_args_t *params, unsigned int exit_phase)
Expand Down Expand Up @@ -689,8 +698,10 @@ void push_env(spank_t spank, saved_env_t **env)
e->new_spindledebug = readSpankEnv(spank, "SPINDLE_DEBUG");
e->old_spindledebug = getenv("SPINDLE_DEBUG");

if (e->new_pwd)
chdir(e->new_pwd);
if (e->new_pwd) {
if (chdir(e->new_pwd) == -1)
sdprintf(1, "WARNING: Could not chdir to %s: %s\n", e->new_pwd, strerror(errno));
}

if (e->new_home)
setenv("HOME", e->new_home, 1);
Expand Down Expand Up @@ -729,8 +740,10 @@ void pop_env(saved_env_t *env)
else
unsetenv("SPINDLE_DEBUG");

if (env->old_pwd)
chdir(env->old_pwd);
if (env->old_pwd) {
if (chdir(env->old_pwd) == -1)
sdprintf(1, "WARNING: Could not chdir to %s: %s\n", env->old_pwd, strerror(errno));
}

if (env->new_home)
free(env->new_home);
Expand Down Expand Up @@ -817,7 +830,7 @@ int dropPrivilegeAndRun(dpr_function_t func, uid_t uid, void *input, char **outp
exit(-1);
}
if (output_len) {
result = safe_write(pipe_fds[1], output_str, output_len+1);
result = safe_write(pipe_fds[1], child_output_str, output_len+1);
if (result != output_len+1) {
error = errno;
fprintf(stderr, "Spindle error. Could not write result string to pipe: %s\n", strerror(error));
Expand Down Expand Up @@ -991,7 +1004,11 @@ pid_t grandchild_fork()
int result, fork_result = -1;

pipe_fds[0] = pipe_fds[1] = -1;
pipe(pipe_fds);
result = pipe(pipe_fds);
if (result == -1) {
sdprintf(1, "ERROR: pipe() failed in grandchild_fork. Aborting spindle\n");
return -1;
}

child_pid = fork();
if (child_pid == -1) {
Expand Down
1 change: 1 addition & 0 deletions src/slurm_plugin/plugin_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ char **getHostsParse(unsigned int num_hosts, const char *shortlist);

int isFEHost(char **hostlist, unsigned int num_hosts);
extern char *unique_file;
void cleanup_unique_file();
int isBEProc(spindle_args_t *params, unsigned int exit_phase);

int doesFEExitSocketExist(spindle_args_t *params);
Expand Down
70 changes: 57 additions & 13 deletions src/slurm_plugin/slurm_plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#include <stdio.h>
#include <fcntl.h>

#include <slurm/slurm.h>
#include <slurm/spank.h>

#include "spindle_launch.h"
Expand Down Expand Up @@ -102,6 +103,7 @@ static __thread spank_t current_spank;
static const char *user_options = NULL;
static int enable_spindle = 0;
static int start_session = 0;
static int prolog_alloc_mode = 0;

extern char *parse_location(char *loc, number_t number);

Expand Down Expand Up @@ -155,7 +157,14 @@ static int should_use_session(spank_t spank) {
if (session_env)
return 1;
err = spank_option_getopt(spank, &session_option, NULL);
return (err == ESPANK_SUCCESS);
return (err == ESPANK_SUCCESS);
}

/* In the allocator, we are running in the same process that
* handled the command line arguments and can check the
* flag directly. */
if (context == S_CTX_ALLOCATOR) {
return start_session;
}

return 0;
Expand All @@ -167,6 +176,23 @@ int slurm_spank_init(spank_t spank, int ac, char *argv[]) {
if (context == S_CTX_ALLOCATOR) {
spank_option_register(spank, &session_option);
}

#if defined(PROLOG_FLAG_ALLOC)
/* Check whether Slurm is configured to run the prolog at
* allocation time (PROLOG_FLAG_ALLOC), or the default mode
* where the prolog runs on the first step. */
if (!prolog_alloc_mode) {
slurm_conf_t *conf = NULL;
if (slurm_load_ctl_conf(0, &conf) == SLURM_SUCCESS) {
if (conf->prolog_flags & PROLOG_FLAG_ALLOC)
prolog_alloc_mode = 1;
slurm_free_ctl_conf(conf);
} else {
sdprintf(1, "Could not read Slurm config, falling back to non-prolog launch.\n");
}
}
#endif

return 0;
}

Expand All @@ -185,6 +211,17 @@ int slurm_spank_init_post_opt(spank_t spank, int ac, char *argv[]) {
if (start_session) {
setenv(SPANK_SPINDLE_USE_SESSION, "1", 1);
}

/* With PrologFlags=Alloc, forward env vars to job control here;
them. Without PrologFlags=Alloc, this forwarding happens later
in local context (srun). */
if (prolog_alloc_mode && start_session) {
int result = forward_environment_to_job_control(spank);
if (result == -1) {
slurm_error("ERROR: Spindle plugin error. Unable to forward environment variables to job control.\n");
return result;
}
}
}
return 0;
}
Expand Down Expand Up @@ -313,10 +350,13 @@ int slurm_spank_local_user_init(spank_t spank, int ac, char *argv[])
goto done;
}

use_session = should_use_session(spank);
use_session = should_use_session(spank);
if (!use_session)
goto done;

if (prolog_alloc_mode)
goto done;

result = process_spindle_args(spank, ac, argv, &params, NULL, NULL, use_session);
if (result == -1) {
slurm_error("ERROR: Spindle plugin error. Could not process spindle args in local user init.\n");
Expand Down Expand Up @@ -383,15 +423,18 @@ int slurm_spank_job_prolog(spank_t spank, int ac, char *argv[]) {
return 0;


envVal = getenv("SPANK_SPINDLE_RSHLAUNCH");
if (envVal && strcmp(envVal, "1") == 0)
return 0;
if (!prolog_alloc_mode) {
envVal = getenv("SPANK_SPINDLE_RSHLAUNCH");
if (envVal && strcmp(envVal, "1") == 0)
return 0;
}

// The prolog starts in the user's home directory.
// Change to $SLURM_JOB_WORK_DIR so logs go to right place.
work_dir = getenv("SLURM_JOB_WORK_DIR");
if (work_dir) {
chdir(work_dir);
if (chdir(work_dir) == -1)
sdprintf(1, "WARNING: Could not chdir to %s: %s\n", work_dir, strerror(errno));
}

err = spank_get_item(spank, S_JOB_UID, &userid);
Expand Down Expand Up @@ -504,11 +547,13 @@ int slurm_spank_task_init(spank_t spank, int site_argc, char *site_argv[])
}

if (params.opts & OPT_OFF) {
pop_env(env);
return 0;
}

/* When using a session without RSHLAUNCH, handle start in job prolog, not here. */
if ((!use_session) || (params.opts & OPT_RSHLAUNCH)) {
/* When using a session without RSHLAUNCH, handle start in job prolog, not here.
With PrologFlags=Alloc, session+RSHLAUNCH is also handled in the prolog. */
if ((!use_session) || ((params.opts & OPT_RSHLAUNCH) && !prolog_alloc_mode)) {
start_params.spank = spank;
start_params.site_argc = site_argc;
start_params.site_argv = site_argv;
Expand Down Expand Up @@ -559,8 +604,9 @@ static int handleStart(void *params, char **output_str)
return 0;
}

// Only initialize a session once
if (use_session && (args.opts & OPT_RSHLAUNCH)) {
/* Only initialize a session once. In prolog context (S_CTX_JOB_SCRIPT),
* there is no step yet, so skip the step ID check. */
if (use_session && (args.opts & OPT_RSHLAUNCH) && spank_context() != S_CTX_JOB_SCRIPT) {
err = get_stepid(spank, &stepid);
if (err != ESPANK_SUCCESS) {
slurm_error("ERROR: Spindle plugin error. Could not get step id.");
Expand Down Expand Up @@ -1169,9 +1215,7 @@ static int launchBE(spank_t spank, spindle_args_t *params)
else
sdprintf(1, "spindleRunBE completed. Session finishing.\n");

if (unique_file) unlink(unique_file);
free(unique_file);
unique_file = NULL;
cleanup_unique_file();

exit(result);

Expand Down
Loading