diff --git a/automation/src/test/resources/reference.conf b/automation/src/test/resources/reference.conf index 24c9a42a8e..110051c3b8 100644 --- a/automation/src/test/resources/reference.conf +++ b/automation/src/test/resources/reference.conf @@ -1,9 +1,9 @@ leonardo { rImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.7" pythonImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.6" - hailImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-hail:1.1.12" + hailImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-hail:1.1.14" gatkImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.9" - aouImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.15" + aouImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.16" baseImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-base:1.1.4" gcrWelderUri = "us.gcr.io/broad-dsp-gcr-public/welder-server" dockerHubWelderUri = "broadinstitute/welder-server" diff --git a/http/src/main/resources/init-resources/init-actions-aou-dataproc-21.sh b/http/src/main/resources/init-resources/init-actions-aou-dataproc-21.sh new file mode 100644 index 0000000000..2296d8d411 --- /dev/null +++ b/http/src/main/resources/init-resources/init-actions-aou-dataproc-21.sh @@ -0,0 +1,538 @@ +#!/usr/bin/env bash + +set -e -x + +# AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + +# This is the very first script as we started on Dataproc +# +# This init script instantiates the tool (e.g. Jupyter) docker images on the Dataproc cluster master node. +# Adapted from https://github.com/GoogleCloudPlatform/dataproc-initialization-actions/blob/master/datalab/datalab.sh +# + +# +# Functions +# + +# Retry a command up to a specific number of times until it exits successfully, +# with exponential back off. +# +# $ retry 5 echo "Hello" +# Hello +# +# $ retry 5 false +# Retry 1/5 exited 1, retrying in 2 seconds... +# Retry 2/5 exited 1, retrying in 4 seconds... +# Retry 3/5 exited 1, retrying in 8 seconds... +# Retry 4/5 exited 1, retrying in 16 seconds... +# Retry 5/5 exited 1, no more retries left. +function retry { + local retries=$1 + shift + + for ((i = 1; i <= $retries; i++)); do + # run with an 'or' so set -e doesn't abort the bash script on errors + exit=0 + "$@" || exit=$? + if [ $exit -eq 0 ]; then + return 0 + fi + wait=$((2 ** $i)) + if [ $i -eq $retries ]; then + log "Retry $i/$retries exited $exit, no more retries left." + break + fi + log "Retry $i/$retries exited $exit, retrying in $wait seconds..." + sleep $wait + done + return 1 +} + +function log() { + echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@" +} + +function betterAptGet() { + if ! { apt-get update 2>&1 || echo E: update failed; } | grep -q '^[WE]:'; then + return 0 + else + return 1 + fi +} + +function apply_user_script() { + local CONTAINER_NAME=$1 + local TARGET_DIR=$2 + + log "Running user script $USER_SCRIPT_URI in $CONTAINER_NAME container..." + USER_SCRIPT=`basename ${USER_SCRIPT_URI}` + if [[ "$USER_SCRIPT_URI" == 'gs://'* ]]; then + gsutil cp ${USER_SCRIPT_URI} /etc + else + curl $USER_SCRIPT_URI -o /etc/${USER_SCRIPT} + fi + docker cp /etc/${USER_SCRIPT} ${CONTAINER_NAME}:${TARGET_DIR}/${USER_SCRIPT} + retry 3 docker exec -u root ${CONTAINER_NAME} chmod +x ${TARGET_DIR}/${USER_SCRIPT} + + # Execute the user script as privileged to allow for deeper customization of VM behavior, e.g. installing + # network egress throttling. As docker is not a security layer, it is assumed that a determined attacker + # can gain full access to the VM already, so using this flag is not a significant escalation. + EXIT_CODE=0 + docker exec --privileged -u root -e PIP_USER=false ${CONTAINER_NAME} ${TARGET_DIR}/${USER_SCRIPT} &> us_output.txt || EXIT_CODE=$? + + if [ $EXIT_CODE -ne 0 ]; then + log "User script failed with exit code $EXIT_CODE. Output is saved to $USER_SCRIPT_OUTPUT_URI." + retry 3 gsutil -h "x-goog-meta-passed":"false" cp us_output.txt ${USER_SCRIPT_OUTPUT_URI} + exit $EXIT_CODE + else + retry 3 gsutil -h "x-goog-meta-passed":"true" cp us_output.txt ${USER_SCRIPT_OUTPUT_URI} + fi +} + +function apply_start_user_script() { + local CONTAINER_NAME=$1 + local TARGET_DIR=$2 + + log "Running start user script $START_USER_SCRIPT_URI in $CONTAINER_NAME container..." + START_USER_SCRIPT=`basename ${START_USER_SCRIPT_URI}` + if [[ "$START_USER_SCRIPT_URI" == 'gs://'* ]]; then + gsutil cp ${START_USER_SCRIPT_URI} /etc + else + curl $START_USER_SCRIPT_URI -o /etc/${START_USER_SCRIPT} + fi + docker cp /etc/${START_USER_SCRIPT} ${CONTAINER_NAME}:${TARGET_DIR}/${START_USER_SCRIPT} + retry 3 docker exec -u root ${CONTAINER_NAME} chmod +x ${TARGET_DIR}/${START_USER_SCRIPT} + + # Keep in sync with startup.sh + EXIT_CODE=0 + docker exec --privileged -u root -e PIP_USER=false ${CONTAINER_NAME} ${TARGET_DIR}/${START_USER_SCRIPT} &> start_output.txt || EXIT_CODE=$? + if [ $EXIT_CODE -ne 0 ]; then + echo "User start script failed with exit code ${EXIT_CODE}. Output is saved to ${START_USER_SCRIPT_OUTPUT_URI}" + retry 3 gsutil -h "x-goog-meta-passed":"false" cp start_output.txt ${START_USER_SCRIPT_OUTPUT_URI} + exit $EXIT_CODE + else + retry 3 gsutil -h "x-goog-meta-passed":"true" cp start_output.txt ${START_USER_SCRIPT_OUTPUT_URI} + fi +} + +# +# Main +# + +# +# Array for instrumentation +# UPDATE THIS IF YOU ADD MORE STEPS: +# currently the steps are: +# START init, +# .. after env setup +# .. after copying files from google and into docker +# .. after docker compose +# .. after welder start +# .. after hail and spark +# .. after nbextension install +# .. after server extension install +# .. after combined extension install +# .. after user script +# .. after lab extension install +# .. after jupyter notebook start +# END +STEP_TIMINGS=($(date +%s)) +# temp workaround for https://github.com/docker/compose/issues/5930 +export CLOUDSDK_PYTHON=python3 + +# This identifies whether we are running on the master node (running the jupyter container). There does not seem to be any customization of the worker nodes +ROLE=$(/usr/share/google/get_metadata_value attributes/dataproc-role) + +# Only initialize tool and proxy docker containers on the master +if [[ "${ROLE}" == 'Master' ]]; then + JUPYTER_HOME=/etc/jupyter + JUPYTER_SCRIPTS=${JUPYTER_HOME}/scripts + KERNELSPEC_HOME=/usr/local/share/jupyter/kernels + + # Set variables + # Values like $(..) are populated by Leo when a cluster is created. + # See https://github.com/DataBiosphere/leonardo/blob/e46acfcb409b11198b1f12533cefea3f6c7fdafb/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/RuntimeTemplateValues.scala#L192 + # Avoid exporting variables unless they are needed by external scripts or docker-compose files. + export CLOUD_SERVICE='DATAPROC' + # Needs to be in sync with terra-docker container + export JUPYTER_USER_HOME=$(jupyterHomeDirectory) + export CLUSTER_NAME=$(clusterName) + export RUNTIME_NAME=$(clusterName) + export GOOGLE_PROJECT=$(googleProject) + export STAGING_BUCKET=$(stagingBucketName) + export OWNER_EMAIL=$(loginHint) + export PET_SA_EMAIL=$(petSaEmail) + export JUPYTER_SERVER_NAME=$(jupyterServerName) + export RSTUDIO_SERVER_NAME=$(rstudioServerName) + export PROXY_SERVER_NAME=$(proxyServerName) + export WELDER_SERVER_NAME=$(welderServerName) + export CRYPTO_DETECTOR_SERVER_NAME=$(cryptoDetectorServerName) + export JUPYTER_DOCKER_IMAGE=$(jupyterDockerImage) + export RSTUDIO_DOCKER_IMAGE=$(rstudioDockerImage) + export PROXY_DOCKER_IMAGE=$(proxyDockerImage) + export WELDER_DOCKER_IMAGE=$(welderDockerImage) + export CRYPTO_DETECTOR_DOCKER_IMAGE=$(cryptoDetectorDockerImage) + export WELDER_ENABLED=$(welderEnabled) + export NOTEBOOKS_DIR=$(notebooksDir) + export MEM_LIMIT=$(memLimit) + export SHM_SIZE=$(shmSize) + export WELDER_MEM_LIMIT=$(welderMemLimit) + export PROXY_SERVER_HOST_NAME=$(proxyServerHostName) + export CERT_DIRECTORY='/certs' + export WORK_DIRECTORY='/work' + export DOCKER_COMPOSE_FILES_DIRECTORY='/etc' + PROXY_SITE_CONF=$(proxySiteConf) + export HOST_PROXY_SITE_CONF_FILE_PATH=${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${PROXY_SITE_CONF}` + if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then + export SHOULD_BACKGROUND_SYNC="true" + else + export SHOULD_BACKGROUND_SYNC="false" + fi + + SERVER_CRT=$(proxyServerCrt) + SERVER_KEY=$(proxyServerKey) + ROOT_CA=$(rootCaPem) + JUPYTER_DOCKER_COMPOSE=$(jupyterDockerCompose) + RSTUDIO_DOCKER_COMPOSE=$(rstudioDockerCompose) + PROXY_DOCKER_COMPOSE=$(proxyDockerCompose) + WELDER_DOCKER_COMPOSE=$(welderDockerCompose) + PROXY_SITE_CONF=$(proxySiteConf) + JUPYTER_SERVER_EXTENSIONS=$(jupyterServerExtensions) + JUPYTER_NB_EXTENSIONS=$(jupyterNbExtensions) + JUPYTER_COMBINED_EXTENSIONS=$(jupyterCombinedExtensions) + JUPYTER_LAB_EXTENSIONS=$(jupyterLabExtensions) + USER_SCRIPT_URI=$(userScriptUri) + USER_SCRIPT_OUTPUT_URI=$(userScriptOutputUri) + START_USER_SCRIPT_URI=$(startUserScriptUri) + # Include a timestamp suffix to differentiate different startup logs across restarts. + START_USER_SCRIPT_OUTPUT_URI="$(startUserScriptOutputUri)" + JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI=$(jupyterNotebookFrontendConfigUri) + CUSTOM_ENV_VARS_CONFIG_URI=$(customEnvVarsConfigUri) + RSTUDIO_SCRIPTS=/etc/rstudio/scripts + RSTUDIO_USER_HOME=/home/rstudio + INIT_BUCKET_NAME=$(initBucketName) + + STEP_TIMINGS+=($(date +%s)) + + log 'Copying secrets from GCS...' + + mkdir /work + mkdir /certs + chmod a+rwx /work + + # Add the certificates from the bucket to the VM. They are used by the docker-compose file + gsutil cp ${SERVER_CRT} /certs + gsutil cp ${SERVER_KEY} /certs + gsutil cp ${ROOT_CA} /certs + gsutil cp gs://${INIT_BUCKET_NAME}/* ${DOCKER_COMPOSE_FILES_DIRECTORY} + + + # GCP connector is used by dataproc to connect with the staging bucket to read the logs + touch /hadoop_gcs_connector_metadata_cache + touch auth_openidc.conf + + + ## Note that the stack driver configuration is changing in later versions of Dataproc, see https://broadworkbench.atlassian.net/browse/IA-5023 + # Add stack driver configuration for welder + tee /etc/google-fluentd/config.d/welder.conf << END + + @type tail + format json + path /work/welder.log + pos_file /var/tmp/fluentd.welder.pos + read_from_head true + tag welder + +END + + # Add stack driver configuration for jupyter + tee /etc/google-fluentd/config.d/jupyter.conf << END + + @type tail + format none + path /work/jupyter.log + pos_file /var/tmp/fluentd.jupyter.pos + read_from_head true + tag jupyter + +END + + # Add stack driver configuration for user startup and shutdown scripts + tee /etc/google-fluentd/config.d/daemon.conf << END + + @type tail + format none + path /var/log/daemon.log + pos_file /var/tmp/fluentd.google.user.daemon.pos + read_from_head true + tag daemon + +END + + service google-fluentd reload + + # Install env var config + if [ ! -z ${CUSTOM_ENV_VARS_CONFIG_URI} ] ; then + log 'Copy custom env vars config...' + gsutil cp ${CUSTOM_ENV_VARS_CONFIG_URI} /var + fi + + + # If any image is hosted in a GAR registry (detected by regex) then + # authorize docker to interact with gcr.io. + # NOTE: GCR images are now hosted on GAR, but the file paths haven't changed, they automatically redirect. + if grep -qF "gcr.io" <<< "${JUPYTER_DOCKER_IMAGE}${RSTUDIO_DOCKER_IMAGE}${PROXY_DOCKER_IMAGE}${WELDER_DOCKER_IMAGE}" ; then + log 'Authorizing GCR/GAR...' + gcloud auth configure-docker + fi + + STEP_TIMINGS+=($(date +%s)) + + log 'Starting up the Jupyter docker...' + + # Run docker-compose for each specified compose file. + # Note the `docker-compose pull` is retried to avoid intermittent network errors, but + # `docker-compose up` is not retried. + COMPOSE_FILES=(-f /etc/`basename ${PROXY_DOCKER_COMPOSE}`) + + cat /etc/`basename ${PROXY_DOCKER_COMPOSE}` + + if [ ! -z ${WELDER_DOCKER_IMAGE} ] && [ "${WELDER_ENABLED}" == "true" ] ; then + COMPOSE_FILES+=(-f /etc/`basename ${WELDER_DOCKER_COMPOSE}`) + cat /etc/`basename ${WELDER_DOCKER_COMPOSE}` + fi + + if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then + TOOL_SERVER_NAME=${JUPYTER_SERVER_NAME} + COMPOSE_FILES+=(-f /etc/`basename ${JUPYTER_DOCKER_COMPOSE}`) + cat /etc/`basename ${JUPYTER_DOCKER_COMPOSE}` + fi + + if [ ! -z ${RSTUDIO_DOCKER_IMAGE} ] ; then + TOOL_SERVER_NAME=${RSTUDIO_SERVER_NAME} + COMPOSE_FILES+=(-f /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}`) + cat /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}` + fi + + retry 5 docker-compose "${COMPOSE_FILES[@]}" config + retry 5 docker-compose "${COMPOSE_FILES[@]}" pull + retry 5 docker-compose "${COMPOSE_FILES[@]}" up -d + + # Start up crypto detector, if enabled. + # This should be started after other containers. + # Use `docker run` instead of docker-compose so we can link it to the Jupyter/RStudio container's network. + # See https://github.com/broadinstitute/terra-cryptomining-security-alerts/tree/master/v2 + if [ ! -z "$CRYPTO_DETECTOR_DOCKER_IMAGE" ] ; then + docker run --name=${CRYPTO_DETECTOR_SERVER_NAME} --rm -d \ + --net=container:${TOOL_SERVER_NAME} ${CRYPTO_DETECTOR_DOCKER_IMAGE} + fi + + STEP_TIMINGS+=($(date +%s)) + + # Jupyter-specific setup, only do if Jupyter is installed + if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then + log 'Installing Jupydocker kernelspecs...' + + # Install notebook.json + if [ ! -z ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} ] ; then + log 'Copy Jupyter frontend notebook config...' + gsutil cp ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} /etc + JUPYTER_NOTEBOOK_FRONTEND_CONFIG=`basename ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI}` + retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "mkdir -p $JUPYTER_HOME/nbconfig" + docker cp /etc/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/ + fi + + STEP_TIMINGS+=($(date +%s)) + + # Install NbExtensions. These are user-specified Jupyter extensions. + # For instance Terra UI is passing + # { + # "nbExtensions": { + # "saturn-iframe-extension": "https://bvdp-saturn-dev.appspot.com/jupyter-iframe-extension.js" + # }, + # "labExtensions": {}, + # "serverExtensions": {}, + # "combinedExtensions": {} + # } + if [ ! -z "${JUPYTER_NB_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_NB_EXTENSIONS} + do + log 'Installing Jupyter NB extension [$ext]...' + if [[ $ext == 'gs://'* ]]; then + gsutil cp $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then + JUPYTER_EXTENSION_FILE=`basename $ext` + curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} + docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + else + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_notebook_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # Install serverExtensions if provided by the user + if [ ! -z "${JUPYTER_SERVER_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_SERVER_EXTENSIONS} + do + log 'Installing Jupyter server extension [$ext]...' + if [[ $ext == 'gs://'* ]]; then + gsutil cp $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + else + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_server_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # Install combined extensions if provided by the user + if [ ! -z "${JUPYTER_COMBINED_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_COMBINED_EXTENSIONS} + do + log 'Installing Jupyter combined extension [$ext]...' + log $ext + if [[ $ext == 'gs://'* ]]; then + gsutil cp $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE} + else + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_combined_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # If a user script was specified, copy it into the docker container and execute it. + if [ ! -z "$USER_SCRIPT_URI" ] ; then + apply_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME + fi + + # done user script + STEP_TIMINGS+=($(date +%s)) + + # If a start user script was specified, copy it into the docker container for consumption during startups. + if [ ! -z "$START_USER_SCRIPT_URI" ] ; then + apply_start_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME + fi + + # done start user script + STEP_TIMINGS+=($(date +%s)) + + # Install lab extensions if provided by the user + # Note: lab extensions need to installed as jupyter user, not root + if [ ! -z "${JUPYTER_LAB_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_LAB_EXTENSIONS} + do + log 'Installing JupyterLab extension [$ext]...' + pwd + if [[ $ext == 'gs://'* ]]; then + gsutil cp -r $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then + JUPYTER_EXTENSION_FILE=`basename $ext` + curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} + docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + else + retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume + # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter + # kernel tries to connect to it. + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true + + # jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network + # A better to do this might be to take welder host as an argument to the script + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/custom/jupyter_delocalize.py" + + # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker. + # This is to make it so that older images will still work after we change notebooks location to home dir + docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py + + # Copy gitignore into jupyter container + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global" + + # Install nbstripout and set gitignore in Git Config + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \ + && python -m nbstripout --install --global \ + && git config --global core.excludesfile $JUPYTER_USER_HOME/gitignore_global" + + # Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU) + docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \ + && mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \ + && cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \ + && ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \ + && cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \ + && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ + && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ + && mkdir -p $JUPYTER_HOME/nbconfig" + + log 'Starting Jupyter Notebook...' + retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}" + + STEP_TIMINGS+=($(date +%s)) + fi + + # RStudio specific setup; only do if RStudio is installed + if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then + EXIT_CODE=0 + retry 3 docker exec ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/set_up_package_dir.sh || EXIT_CODE=$? + if [ $EXIT_CODE -ne 0 ]; then + echo "RStudio user package installation directory creation failed, creating /packages directory" + docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c "mkdir -p ${RSTUDIO_USER_HOME}/packages && chmod a+rwx ${RSTUDIO_USER_HOME}/packages" + fi + + # Add the EVs specified in rstudio-docker-compose.yaml to Renviron.site + retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'echo "GOOGLE_PROJECT=$GOOGLE_PROJECT +CLUSTER_NAME=$CLUSTER_NAME +RUNTIME_NAME=$RUNTIME_NAME +OWNER_EMAIL=$OWNER_EMAIL +SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC" >> /usr/local/lib/R/etc/Renviron.site' + + # Add custom_env_vars.env to Renviron.site + CUSTOM_ENV_VARS_FILE=/var/custom_env_vars.env + if [ -f "$CUSTOM_ENV_VARS_FILE" ]; then + retry 3 docker cp ${CUSTOM_ENV_VARS_FILE} ${RSTUDIO_SERVER_NAME}:/usr/local/lib/R/var/custom_env_vars.env + retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'cat /usr/local/lib/R/var/custom_env_vars.env >> /usr/local/lib/R/etc/Renviron.site' + fi + + # If a user script was specified, copy it into the docker container and execute it. + if [ ! -z "$USER_SCRIPT_URI" ] ; then + apply_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS + fi + + # If a start user script was specified, copy it into the docker container for consumption during startups. + if [ ! -z "$START_USER_SCRIPT_URI" ] ; then + apply_start_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS + fi + + # Start RStudio server + retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init + fi + + # Remove any unneeded cached images to save disk space. + # Do this asynchronously so it doesn't hold up cluster creation + log 'Pruning docker images...' + docker image prune -a -f & +fi + +log 'All done!' +log "Timings: ${STEP_TIMINGS[@]}" \ No newline at end of file diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index 3a3840cc0e..6745cf3886 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -122,6 +122,7 @@ function apply_start_user_script() { # UPDATE THIS IF YOU ADD MORE STEPS: # currently the steps are: # START init, +# .. after gcloud Ops Agent # .. after env setup # .. after copying files from google and into docker # .. after docker compose @@ -135,6 +136,20 @@ function apply_start_user_script() { # .. after jupyter notebook start # END STEP_TIMINGS=($(date +%s)) + + +## Installs Google Cloud Ops Agent that is now required for Datapoc 2.2.X ### +# See https://github.com/GoogleCloudDataproc/initialization-actions/tree/master/opsagent +# Installs the Google Cloud Ops Agent on each node in the cluster. +# It also provides an override to the built-in logging config to set empty +# receivers i.e. not collect any logs. +# If you need to collect syslogs, you can use the other script in this directory, +# opsagent.sh which uses the built-in configuration of Ops Agent. +# See https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/configuration#default. +# +curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh +bash add-google-cloud-ops-agent-repo.sh --also-install + # temp workaround for https://github.com/docker/compose/issues/5930 export CLOUDSDK_PYTHON=python3 @@ -230,44 +245,25 @@ if [[ "${ROLE}" == 'Master' ]]; then touch auth_openidc.conf - ## Note that the stack driver configuration is changing in later versions of Dataproc, see https://broadworkbench.atlassian.net/browse/IA-5023 - # Add stack driver configuration for welder - tee /etc/google-fluentd/config.d/welder.conf << END - - @type tail - format json - path /work/welder.log - pos_file /var/tmp/fluentd.welder.pos - read_from_head true - tag welder - -END - - # Add stack driver configuration for jupyter - tee /etc/google-fluentd/config.d/jupyter.conf << END - - @type tail - format none - path /work/jupyter.log - pos_file /var/tmp/fluentd.jupyter.pos - read_from_head true - tag jupyter - -END - - # Add stack driver configuration for user startup and shutdown scripts - tee /etc/google-fluentd/config.d/daemon.conf << END - - @type tail - format none - path /var/log/daemon.log - pos_file /var/tmp/fluentd.google.user.daemon.pos - read_from_head true - tag daemon - -END - - service google-fluentd reload + # Add ops agent configuration for welder, jupyter, user startup and shutdown scripts + cat <> /etc/google-cloud-ops-agent/config.yaml + logging: + receivers: + welder: + type: files + include_paths: [/work/welder.log] + jupyter: + type: files + include_paths: [/work/jupyter.log] + daemon: + type: files + include_paths: [/var/log/daemon.log] + service: + pipelines: + default_pipeline: + receivers: [welder, jupyter, daemon] +EOF + systemctl restart google-cloud-ops-agent # Install env var config if [ ! -z ${CUSTOM_ENV_VARS_CONFIG_URI} ] ; then @@ -313,6 +309,10 @@ END fi retry 5 docker-compose "${COMPOSE_FILES[@]}" config + + # restart docker + systemctl restart docker + retry 5 docker-compose "${COMPOSE_FILES[@]}" pull retry 5 docker-compose "${COMPOSE_FILES[@]}" up -d diff --git a/http/src/main/resources/init-resources/proxy-docker-compose.yaml b/http/src/main/resources/init-resources/proxy-docker-compose.yaml index 303bb21673..932c6bfbfc 100644 --- a/http/src/main/resources/init-resources/proxy-docker-compose.yaml +++ b/http/src/main/resources/init-resources/proxy-docker-compose.yaml @@ -2,8 +2,9 @@ version: '2.4' services: proxy: container_name: "${PROXY_SERVER_NAME}" - image: "${PROXY_DOCKER_IMAGE}" + image: "mirror.gcr.io/${PROXY_DOCKER_IMAGE}" network_mode: host + ipc: shareable volumes: - ${CERT_DIRECTORY}/jupyter-server.crt:/etc/ssl/certs/server.crt:ro - ${CERT_DIRECTORY}/jupyter-server.key:/etc/ssl/private/server.key:ro diff --git a/http/src/main/resources/reference.conf b/http/src/main/resources/reference.conf index b73db7ae98..814de24177 100644 --- a/http/src/main/resources/reference.conf +++ b/http/src/main/resources/reference.conf @@ -69,8 +69,11 @@ dataproc { region = "us-central1" } - # Cached dataproc image used by Terra - customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-1-11-debian11-2025-03-18-14-36-11" + # Cached dataproc 2.2.x image used by Terra + customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-2-52-debian12-2025-05-06-13-58-25" + # AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + # Cached dataproc 2.1.x image used by AOU + legacyAouCustomDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-1-11-debian11-2025-05-06-16-08-29" # The ratio of memory allocated to spark. 0.8 = 80%. # Hail/Spark users generally allocate 80% of the ram to the JVM. @@ -111,7 +114,7 @@ dataproc { } gce { - customGceImage = "projects/broad-dsp-gcr-public/global/images/leo-gce-image-2025-04-28-18-54-55" + customGceImage = "projects/broad-dsp-gcr-public/global/images/leo-gce-image-2025-05-05-17-10-22" userDiskDeviceName = "user-disk" defaultScopes = [ "https://www.googleapis.com/auth/userinfo.email", @@ -896,7 +899,7 @@ image { welderGcrUri = "us.gcr.io/broad-dsp-gcr-public/welder-server" welderDockerHubUri = "broadinstitute/welder-server" welderHash = "0c1d0eb" - jupyterImage = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.6" + jupyterImage = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.9" proxyImage = "broadinstitute/openidc-proxy:2.3.1_2" # Note: If you update this, please also update prepare_gce_image.sh and # prepare-custom-leonardo-jupyter-dataproc-image.sh scripts. @@ -935,6 +938,9 @@ welder { # cluster scripts and config gceClusterResources { initScript = "gce-init.sh" + # AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod. + # This is a placeholder to avoid making the rest of the code more complicated + legacyAOUInitScript = "gce-init.sh" cloudInit = "cloud-init.yml" startupScript = "startup.sh" shutdownScript = "shutdown.sh" @@ -951,6 +957,8 @@ gceClusterResources { clusterResources { initScript = "init-actions.sh" + # AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + legacyAOUInitScript = "init-actions-aou-dataproc-21.sh" startupScript = "startup.sh" shutdownScript = "shutdown.sh" jupyterDockerCompose = "jupyter-docker-compose.yaml" diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala index 8955524efa..9313ad68b4 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala @@ -1,19 +1,21 @@ package org.broadinstitute.dsde.workbench.leonardo package config -case class ClusterResourcesConfig(initScript: RuntimeResource, - cloudInit: Option[RuntimeResource], - startupScript: RuntimeResource, - shutdownScript: RuntimeResource, - jupyterDockerCompose: RuntimeResource, - gpuDockerCompose: Option[RuntimeResource], // only applies to GCE runtimes - rstudioDockerCompose: RuntimeResource, - proxyDockerCompose: RuntimeResource, - welderDockerCompose: RuntimeResource, - proxySiteConf: RuntimeResource, - jupyterNotebookConfigUri: RuntimeResource, - jupyterNotebookFrontendConfigUri: RuntimeResource, - customEnvVarsConfigUri: RuntimeResource +case class ClusterResourcesConfig( + initScript: RuntimeResource, + legacyAOUInitScript: RuntimeResource, // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + cloudInit: Option[RuntimeResource], + startupScript: RuntimeResource, + shutdownScript: RuntimeResource, + jupyterDockerCompose: RuntimeResource, + gpuDockerCompose: Option[RuntimeResource], // only applies to GCE runtimes + rstudioDockerCompose: RuntimeResource, + proxyDockerCompose: RuntimeResource, + welderDockerCompose: RuntimeResource, + proxySiteConf: RuntimeResource, + jupyterNotebookConfigUri: RuntimeResource, + jupyterNotebookFrontendConfigUri: RuntimeResource, + customEnvVarsConfigUri: RuntimeResource ) object ClusterResourcesConfig { diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala index f9f62aee65..5a2fd33a6c 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala @@ -124,6 +124,8 @@ object Config { DataprocConfig( config.getStringList("defaultScopes").asScala.toSet, config.as[DataprocCustomImage]("customDataprocImage"), + // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + config.as[DataprocCustomImage]("legacyAouCustomDataprocImage"), config.getAs[Double]("sparkMemoryConfigRatio"), config.getAs[Double]("minimumRuntimeMemoryInGb"), config.as[RuntimeConfig.DataprocConfig]("runtimeDefaults"), @@ -199,6 +201,8 @@ object Config { config => ClusterResourcesConfig( config.as[RuntimeResource]("initScript"), + // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + config.as[RuntimeResource]("legacyAOUInitScript"), config.getAs[RuntimeResource]("cloudInit"), config.as[RuntimeResource]("startupScript"), config.as[RuntimeResource]("shutdownScript"), diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala index 1b7d8c20d3..8aedbb9a02 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala @@ -7,6 +7,8 @@ import org.broadinstitute.dsde.workbench.leonardo.CustomImage.DataprocCustomImag final case class DataprocConfig( defaultScopes: Set[String], customDataprocImage: DataprocCustomImage, + // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + legacyAouCustomDataprocImage: DataprocCustomImage, sparkMemoryConfigRatio: Option[Double], minimumRuntimeMemoryInGb: Option[Double], runtimeConfigDefaults: RuntimeConfig.DataprocConfig, diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BucketHelper.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BucketHelper.scala index 3e130e072b..19e28c86e1 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BucketHelper.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BucketHelper.scala @@ -209,6 +209,8 @@ class BucketHelper[F[_]]( .emits( List( clusterResourcesConfig.initScript, + // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + clusterResourcesConfig.legacyAOUInitScript, clusterResourcesConfig.jupyterNotebookFrontendConfigUri ) ) diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala index 5ed6715621..42e6cf2a05 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala @@ -184,16 +184,19 @@ class DataprocInterpreter[F[_]: Parallel]( .compile .drain - // build cluster configuration - initScriptResources = List(config.clusterResourcesConfig.initScript) - initScripts = initScriptResources.map(resource => GcsPath(initBucketName, GcsObjectName(resource.asString))) - - // If we need to support 2 version of dataproc custom image, we'll update this -// dataprocImage = config.dataprocConfig.customDataprocImage + imageUrls = params.runtimeImages.map(_.imageUrl) + // build cluster configuration // We need to maintain the old version of the dataproc image to uncouple the terra from the aou release - imageUrls = params.runtimeImages.map(_.imageUrl) - dataprocImage = config.dataprocConfig.customDataprocImage + // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + (dataprocImage, initScriptResources) = + if (imageUrls.contains("us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.13")) + (config.dataprocConfig.legacyAouCustomDataprocImage, + List(config.clusterResourcesConfig.legacyAOUInitScript) + ) + else (config.dataprocConfig.customDataprocImage, List(config.clusterResourcesConfig.initScript)) + + initScripts = initScriptResources.map(resource => GcsPath(initBucketName, GcsObjectName(resource.asString))) // If the cluster is configured with worker private access, then specify the // `leonardo-private` network tag. This tag will be removed from the master node @@ -204,7 +207,9 @@ class DataprocInterpreter[F[_]: Parallel]( } else { List(config.vpcConfig.networkTag.value) } - + // Dataproc 2.2.X changed the default behavior to not assign an external ip address anymore, + // but a combination of Internal IP only and Private Google Access instead, see: + // https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/network#create-a-dataproc-cluster-with-internal-IP-addresses-only gceClusterConfig = { val bldr = GceClusterConfig .newBuilder() @@ -212,6 +217,7 @@ class DataprocInterpreter[F[_]: Parallel]( .setSubnetworkUri(subnetwork.value) .setServiceAccount(params.serviceAccountInfo.value) .addAllServiceAccountScopes(params.scopes.asJava) + .setInternalIpOnly(false) bldr.build() } diff --git a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/CommonTestData.scala b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/CommonTestData.scala index 094f8300a7..cdcce4ca6a 100644 --- a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/CommonTestData.scala +++ b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/CommonTestData.scala @@ -236,6 +236,9 @@ object CommonTestData { RuntimeImage(Proxy, imageConfig.proxyImage.imageUrl, None, Instant.now.truncatedTo(ChronoUnit.MICROS)) val customDataprocImage = RuntimeImage(BootSource, "custom_dataproc", None, Instant.now.truncatedTo(ChronoUnit.MICROS)) + // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + val legacyAouCustomDataprocImage = + RuntimeImage(BootSource, "legacy_aou_custom_dataproc", None, Instant.now.truncatedTo(ChronoUnit.MICROS)) val cryptoDetectorImage = RuntimeImage(CryptoDetector, "crypto/crypto:0.0.1", None, Instant.now.truncatedTo(ChronoUnit.MICROS)) diff --git a/jenkins/dataproc-custom-images/create_dataproc_image.sh b/jenkins/dataproc-custom-images/create_dataproc_image.sh index 54bdb3dda7..1eb29ce746 100755 --- a/jenkins/dataproc-custom-images/create_dataproc_image.sh +++ b/jenkins/dataproc-custom-images/create_dataproc_image.sh @@ -27,7 +27,7 @@ TEST_BUCKET="gs://leo-dataproc-image-creation-logs" pushd $WORK_DIR DATAPROC_BASE_NAME="leo-dataproc-image" -DP_VERSION_FORMATTED="2-1-11-debian11" +DP_VERSION_FORMATTED="2-2-52-debian12" # This needs to be unique for each run IMAGE_ID=$(date +"%Y-%m-%d-%H-%M-%S") OUTPUT_IMAGE_NAME="$DATAPROC_BASE_NAME-$DP_VERSION_FORMATTED-$IMAGE_ID" @@ -36,7 +36,7 @@ gcloud config set dataproc/region us-central1 python generate_custom_image.py \ --image-name "$OUTPUT_IMAGE_NAME" \ - --dataproc-version "2.1.11-debian11" \ + --dataproc-version "2.2.52-debian12" \ --customization-script ../prepare-custom-leonardo-jupyter-dataproc-image.sh \ --zone $ZONE \ --gcs-bucket $DATAPROC_IMAGE_BUCKET \ diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh index 34c9f0171a..d66ec614e8 100755 --- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh +++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh @@ -16,28 +16,33 @@ set -e -x # the image tags are set via jenkins automation # -terra_jupyter_python="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.5" -terra_jupyter_r="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.6" -terra_jupyter_bioconductor="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-bioconductor:2.2.6" -terra_jupyter_hail="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-hail:1.1.12" -terra_jupyter_gatk="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.8" -terra_jupyter_aou="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.14" +terra_jupyter_python="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.6" +terra_jupyter_r="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.7" +terra_jupyter_bioconductor="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-bioconductor:2.2.7" +terra_jupyter_hail="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-hail:1.1.14" +terra_jupyter_gatk="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.9" +terra_jupyter_aou="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.16" + welder_server="us.gcr.io/broad-dsp-gcr-public/welder-server:8667bfe" openidc_proxy="broadinstitute/openidc-proxy:2.3.1_2" anvil_rstudio_bioconductor="us.gcr.io/broad-dsp-gcr-public/anvil-rstudio-bioconductor:3.20.1" # Note that this is the version used currently by AOU in production, the one above can be staged for testing +# AN-503: Note that AOU 2.2.13 is using a hail version that still requires dataproc 2.1.x +# You can check which version of the AOU image is used in prod here: https://github.com/all-of-us/workbench/blob/main/api/config/config_prod.json#L15C1-L16C1 terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.13" # If you change this you must also change Leo reference.conf! cryptomining_detector="us.gcr.io/broad-dsp-gcr-public/cryptomining-detector:0.0.2" -# this array determines which of the above images are baked into the custom image +# This array determines which of the above images are baked into the custom dataproc 2.2.x image # the entry must match the var name above, which must correspond to a valid docker URI -docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" +docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" -# NOTE - UNCOMMENT TO REGENERATE THE AOU LEGACY DATAPROC IMAGE -# You would also need to change the debian version, see https://github.com/DataBiosphere/leonardo/pull/3871 +# Comment the above and uncomment this to create the dataproc 2.1.x image +# You would also need to revert the dataproc versions in the create_dataproc_image.sh like this: +# DP_VERSION_FORMATTED="2-1-11-debian11" +# --dataproc-version "2.1.11-debian11" #docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" # The version of python to install @@ -142,15 +147,14 @@ retry 5 add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/d retry 5 apt-get update dpkg --configure -a -# This line fails consistently, but it does not fail in a fatal way so we add `|| true` to prevent the script from halting execution -# The message that is non-fatal is `Sub-process /usr/bin/dpkg returned an error code (1).` -# NOTE: If it fails with another legitimate error, this `|| true` could mask it. It was used as a last resort after a lot of attempts to fix. -apt-get install -y -q docker-ce || true -log 'Installing Docker Compose...' +# start docker +systemctl start docker + +log 'Installing Docker Compose...' # Install docker-compose # https://docs.docker.com/compose/install/#install-compose -docker_compose_version_number="1.22.0" +docker_compose_version_number="v2.28.1" docker_compose_kernel_name="$(uname -s)" docker_compose_machine_hardware_name="$(uname -m)" docker_compose_binary_download_url="https://github.com/docker/compose/releases/download/${docker_compose_version_number:?}/docker-compose-${docker_compose_kernel_name:?}-${docker_compose_machine_hardware_name:?}" diff --git a/jenkins/gce-custom-images/prepare_gce_image.sh b/jenkins/gce-custom-images/prepare_gce_image.sh index 185616c8fe..b8858f689d 100755 --- a/jenkins/gce-custom-images/prepare_gce_image.sh +++ b/jenkins/gce-custom-images/prepare_gce_image.sh @@ -20,13 +20,14 @@ terra_jupyter_python="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.6" terra_jupyter_r="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.7" terra_jupyter_bioconductor="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-bioconductor:2.2.7" terra_jupyter_gatk="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.9" -terra_jupyter_aou="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.15" +terra_jupyter_aou="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.16" welder_server="us.gcr.io/broad-dsp-gcr-public/welder-server:8667bfe" openidc_proxy="broadinstitute/openidc-proxy:2.3.1_2" anvil_rstudio_bioconductor="us.gcr.io/broad-dsp-gcr-public/anvil-rstudio-bioconductor:3.20.1" # Note that this is the version used currently by AOU in production, the one above can be staged for testing -terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.14" +# You can check which version of the AOU image is used in prod here: https://github.com/all-of-us/workbench/blob/main/api/config/config_prod.json#L15C1-L16C1 +terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.13" cos_gpu_installer="gcr.io/cos-cloud/cos-gpu-installer:v2.1.9" google_cloud_toolbox="us.gcr.io/cos-cloud/toolbox:v20230714"