From 65c5444cec74ded8b936a7b5cb6c26597018c2e5 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Wed, 16 Apr 2025 17:39:37 -0400 Subject: [PATCH 01/35] Generate new dataproc image with opsagent installed for logging --- .../resources/init-resources/init-actions.sh | 36 +++++++++++++++++++ .../create_dataproc_image.sh | 4 +-- ...-custom-leonardo-jupyter-dataproc-image.sh | 2 +- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index 3a3840cc0e8..3149a37b36b 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -122,6 +122,7 @@ function apply_start_user_script() { # UPDATE THIS IF YOU ADD MORE STEPS: # currently the steps are: # START init, +# .. after gcloud Ops Agent # .. after env setup # .. after copying files from google and into docker # .. after docker compose @@ -135,6 +136,41 @@ function apply_start_user_script() { # .. after jupyter notebook start # END STEP_TIMINGS=($(date +%s)) + + +### Installs Google Cloud Ops Agent that is now required for Datapoc 2.2.X ### +# See https://github.com/GoogleCloudDataproc/initialization-actions/tree/master/opsagent +# Installs the Google Cloud Ops Agent on each node in the cluster. +# It also provides an override to the built-in logging config to set empty +# receivers i.e. not collect any logs. +# If you need to collect syslogs, you can use the other script in this directory, +# opsagent.sh which uses the built-in configuration of Ops Agent. +# See https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/configuration#default. + +# Detect dataproc image version from its various names +if (! test -v DATAPROC_IMAGE_VERSION) && test -v DATAPROC_VERSION; then + DATAPROC_IMAGE_VERSION="${DATAPROC_VERSION}" +fi + +if [[ $(echo "${DATAPROC_IMAGE_VERSION} < 2.2" | bc -l) == 1 ]]; then + echo "This Dataproc cluster node runs image version ${DATAPROC_IMAGE_VERSION} with pre-installed legacy monitoring agent. Skipping Ops Agent installation." + exit 0 +fi + +curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh +bash add-google-cloud-ops-agent-repo.sh --also-install + +cat <> /etc/google-cloud-ops-agent/config.yaml +logging: + service: + pipelines: + default_pipeline: + receivers: [] +EOF + +systemctl restart google-cloud-ops-agent +### + # temp workaround for https://github.com/docker/compose/issues/5930 export CLOUDSDK_PYTHON=python3 diff --git a/jenkins/dataproc-custom-images/create_dataproc_image.sh b/jenkins/dataproc-custom-images/create_dataproc_image.sh index 54bdb3dda7b..1eb29ce7467 100755 --- a/jenkins/dataproc-custom-images/create_dataproc_image.sh +++ b/jenkins/dataproc-custom-images/create_dataproc_image.sh @@ -27,7 +27,7 @@ TEST_BUCKET="gs://leo-dataproc-image-creation-logs" pushd $WORK_DIR DATAPROC_BASE_NAME="leo-dataproc-image" -DP_VERSION_FORMATTED="2-1-11-debian11" +DP_VERSION_FORMATTED="2-2-52-debian12" # This needs to be unique for each run IMAGE_ID=$(date +"%Y-%m-%d-%H-%M-%S") OUTPUT_IMAGE_NAME="$DATAPROC_BASE_NAME-$DP_VERSION_FORMATTED-$IMAGE_ID" @@ -36,7 +36,7 @@ gcloud config set dataproc/region us-central1 python generate_custom_image.py \ --image-name "$OUTPUT_IMAGE_NAME" \ - --dataproc-version "2.1.11-debian11" \ + --dataproc-version "2.2.52-debian12" \ --customization-script ../prepare-custom-leonardo-jupyter-dataproc-image.sh \ --zone $ZONE \ --gcs-bucket $DATAPROC_IMAGE_BUCKET \ diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh index 34c9f0171a0..48f140e16c9 100755 --- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh +++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh @@ -19,7 +19,7 @@ set -e -x terra_jupyter_python="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.5" terra_jupyter_r="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.6" terra_jupyter_bioconductor="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-bioconductor:2.2.6" -terra_jupyter_hail="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-hail:1.1.12" +terra_jupyter_hail="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-hail:1.1.11" terra_jupyter_gatk="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.8" terra_jupyter_aou="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.14" welder_server="us.gcr.io/broad-dsp-gcr-public/welder-server:8667bfe" From dea02ec9a5ce37e9557d5b3a03e448dab2819116 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Wed, 23 Apr 2025 13:40:58 -0400 Subject: [PATCH 02/35] update docker compose --- .../prepare-custom-leonardo-jupyter-dataproc-image.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh index 48f140e16c9..44a095e7c53 100755 --- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh +++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh @@ -145,12 +145,15 @@ dpkg --configure -a # This line fails consistently, but it does not fail in a fatal way so we add `|| true` to prevent the script from halting execution # The message that is non-fatal is `Sub-process /usr/bin/dpkg returned an error code (1).` # NOTE: If it fails with another legitimate error, this `|| true` could mask it. It was used as a last resort after a lot of attempts to fix. -apt-get install -y -q docker-ce || true +# apt-get install -y -q docker-ce || true log 'Installing Docker Compose...' +# start docker +systemctl start docker + # Install docker-compose # https://docs.docker.com/compose/install/#install-compose -docker_compose_version_number="1.22.0" +docker_compose_version_number="2.28.1" docker_compose_kernel_name="$(uname -s)" docker_compose_machine_hardware_name="$(uname -m)" docker_compose_binary_download_url="https://github.com/docker/compose/releases/download/${docker_compose_version_number:?}/docker-compose-${docker_compose_kernel_name:?}-${docker_compose_machine_hardware_name:?}" From 7e533097043ad6a59b33943ef4fb3787bd5ead4c Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Wed, 23 Apr 2025 15:22:48 -0400 Subject: [PATCH 03/35] point to new dataproc 2.2 image --- http/src/main/resources/reference.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/http/src/main/resources/reference.conf b/http/src/main/resources/reference.conf index 4335ca47730..302ae4d8cf1 100644 --- a/http/src/main/resources/reference.conf +++ b/http/src/main/resources/reference.conf @@ -70,7 +70,7 @@ dataproc { } # Cached dataproc image used by Terra - customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-1-11-debian11-2025-03-18-14-36-11" + customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-2-52-debian12-2025-04-23-17-54-05" # The ratio of memory allocated to spark. 0.8 = 80%. # Hail/Spark users generally allocate 80% of the ram to the JVM. From 8e07d19cef5414ff56b7d6e21a7a51f3a183815a Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Wed, 23 Apr 2025 16:53:42 -0400 Subject: [PATCH 04/35] set up loggin receivers --- .../resources/init-resources/init-actions.sh | 113 +++++++++++------- 1 file changed, 67 insertions(+), 46 deletions(-) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index 3149a37b36b..00e2b12c5dc 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -159,16 +159,16 @@ fi curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh bash add-google-cloud-ops-agent-repo.sh --also-install - -cat <> /etc/google-cloud-ops-agent/config.yaml -logging: - service: - pipelines: - default_pipeline: - receivers: [] -EOF - -systemctl restart google-cloud-ops-agent +# +#cat <> /etc/google-cloud-ops-agent/config.yaml +#logging: +# service: +# pipelines: +# default_pipeline: +# receivers: [] +#EOF +# +#systemctl restart google-cloud-ops-agent ### # temp workaround for https://github.com/docker/compose/issues/5930 @@ -266,44 +266,65 @@ if [[ "${ROLE}" == 'Master' ]]; then touch auth_openidc.conf + # Add ops agent configuration for welder, jupyter, user startup and shutdown scripts + cat <> /etc/google-cloud-ops-agent/config.yaml + logging: + receivers: + welder + type: files + include_paths: [/work/welder.log] + jupyter + type: files + include_paths: [/work/jupyter.log] + daemon + type: files + include_paths: [/var/log/daemon.log] + service: + pipelines: + default_pipeline: + receivers: [welder, jupyter, daemon] +EOF + + systemctl restart google-cloud-ops-agent + ## Note that the stack driver configuration is changing in later versions of Dataproc, see https://broadworkbench.atlassian.net/browse/IA-5023 # Add stack driver configuration for welder - tee /etc/google-fluentd/config.d/welder.conf << END - - @type tail - format json - path /work/welder.log - pos_file /var/tmp/fluentd.welder.pos - read_from_head true - tag welder - -END - - # Add stack driver configuration for jupyter - tee /etc/google-fluentd/config.d/jupyter.conf << END - - @type tail - format none - path /work/jupyter.log - pos_file /var/tmp/fluentd.jupyter.pos - read_from_head true - tag jupyter - -END - - # Add stack driver configuration for user startup and shutdown scripts - tee /etc/google-fluentd/config.d/daemon.conf << END - - @type tail - format none - path /var/log/daemon.log - pos_file /var/tmp/fluentd.google.user.daemon.pos - read_from_head true - tag daemon - -END - - service google-fluentd reload +# tee /etc/google-fluentd/config.d/welder.conf << END +# +# @type tail +# format json +# path /work/welder.log +# pos_file /var/tmp/fluentd.welder.pos +# read_from_head true +# tag welder +# +#END +# +# # Add stack driver configuration for jupyter +# tee /etc/google-fluentd/config.d/jupyter.conf << END +# +# @type tail +# format none +# path /work/jupyter.log +# pos_file /var/tmp/fluentd.jupyter.pos +# read_from_head true +# tag jupyter +# +#END +# +# # Add stack driver configuration for user startup and shutdown scripts +# tee /etc/google-fluentd/config.d/daemon.conf << END +# +# @type tail +# format none +# path /var/log/daemon.log +# pos_file /var/tmp/fluentd.google.user.daemon.pos +# read_from_head true +# tag daemon +# +#END +# +# service google-fluentd reload # Install env var config if [ ! -z ${CUSTOM_ENV_VARS_CONFIG_URI} ] ; then From 5a98acdd28ec92d91ec9345df3b2d1a8390fe6d7 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Thu, 24 Apr 2025 09:22:27 -0400 Subject: [PATCH 05/35] empty ops agent config file for debugging purposes --- .../resources/init-resources/init-actions.sh | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index 00e2b12c5dc..1bb62d02561 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -267,23 +267,30 @@ if [[ "${ROLE}" == 'Master' ]]; then # Add ops agent configuration for welder, jupyter, user startup and shutdown scripts +# cat <> /etc/google-cloud-ops-agent/config.yaml +# logging: +# receivers: +# welder +# type: files +# include_paths: [/work/welder.log] +# jupyter +# type: files +# include_paths: [/work/jupyter.log] +# daemon +# type: files +# include_paths: [/var/log/daemon.log] +# service: +# pipelines: +# default_pipeline: +# receivers: [welder, jupyter, daemon] +#EOF cat <> /etc/google-cloud-ops-agent/config.yaml logging: - receivers: - welder - type: files - include_paths: [/work/welder.log] - jupyter - type: files - include_paths: [/work/jupyter.log] - daemon - type: files - include_paths: [/var/log/daemon.log] service: pipelines: default_pipeline: - receivers: [welder, jupyter, daemon] -EOF + receivers: [] + EOF systemctl restart google-cloud-ops-agent From 5873b016c4ffb9537a41fd6d56d04eed2230afdc Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Thu, 24 Apr 2025 09:56:05 -0400 Subject: [PATCH 06/35] arg typo! --- http/src/main/resources/init-resources/init-actions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index 1bb62d02561..06b7c3022ba 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -290,7 +290,7 @@ if [[ "${ROLE}" == 'Master' ]]; then pipelines: default_pipeline: receivers: [] - EOF +EOF systemctl restart google-cloud-ops-agent From 764709f409652bb1a8cda425467cb51436b07b55 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Thu, 24 Apr 2025 10:57:34 -0400 Subject: [PATCH 07/35] fix docker compose install and ops agent config format --- .../resources/init-resources/init-actions.sh | 30 +++++++------------ ...-custom-leonardo-jupyter-dataproc-image.sh | 2 +- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index 06b7c3022ba..456a287a9fd 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -267,31 +267,23 @@ if [[ "${ROLE}" == 'Master' ]]; then # Add ops agent configuration for welder, jupyter, user startup and shutdown scripts -# cat <> /etc/google-cloud-ops-agent/config.yaml -# logging: -# receivers: -# welder -# type: files -# include_paths: [/work/welder.log] -# jupyter -# type: files -# include_paths: [/work/jupyter.log] -# daemon -# type: files -# include_paths: [/var/log/daemon.log] -# service: -# pipelines: -# default_pipeline: -# receivers: [welder, jupyter, daemon] -#EOF cat <> /etc/google-cloud-ops-agent/config.yaml logging: + receivers: + welder: + type: files + include_paths: [/work/welder.log] + jupyter: + type: files + include_paths: [/work/jupyter.log] + daemon: + type: files + include_paths: [/var/log/daemon.log] service: pipelines: default_pipeline: - receivers: [] + receivers: [welder, jupyter, daemon] EOF - systemctl restart google-cloud-ops-agent ## Note that the stack driver configuration is changing in later versions of Dataproc, see https://broadworkbench.atlassian.net/browse/IA-5023 diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh index 44a095e7c53..5ed2394fc78 100755 --- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh +++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh @@ -153,7 +153,7 @@ systemctl start docker # Install docker-compose # https://docs.docker.com/compose/install/#install-compose -docker_compose_version_number="2.28.1" +docker_compose_version_number="v2.28.1" docker_compose_kernel_name="$(uname -s)" docker_compose_machine_hardware_name="$(uname -m)" docker_compose_binary_download_url="https://github.com/docker/compose/releases/download/${docker_compose_version_number:?}/docker-compose-${docker_compose_kernel_name:?}-${docker_compose_machine_hardware_name:?}" From de472f1c4ab966dad74975bd3549fe7bcd7f27e6 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Thu, 24 Apr 2025 14:27:01 -0400 Subject: [PATCH 08/35] point to new dataproc with docker compose installed --- http/src/main/resources/reference.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/http/src/main/resources/reference.conf b/http/src/main/resources/reference.conf index 302ae4d8cf1..13c5735378e 100644 --- a/http/src/main/resources/reference.conf +++ b/http/src/main/resources/reference.conf @@ -70,7 +70,7 @@ dataproc { } # Cached dataproc image used by Terra - customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-2-52-debian12-2025-04-23-17-54-05" + customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-2-52-debian12-2025-04-24-14-59-45" # The ratio of memory allocated to spark. 0.8 = 80%. # Hail/Spark users generally allocate 80% of the ram to the JVM. From df20494a4ea7d66a99c4e85e632a859b628a77ca Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Thu, 24 Apr 2025 16:52:04 -0400 Subject: [PATCH 09/35] try to docker restart to get around proxy issues --- http/src/main/resources/init-resources/init-actions.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index 456a287a9fd..c4dc172b92a 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -368,6 +368,9 @@ EOF cat /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}` fi + # restart docker + systemctl restart docker + retry 5 docker-compose "${COMPOSE_FILES[@]}" config retry 5 docker-compose "${COMPOSE_FILES[@]}" pull retry 5 docker-compose "${COMPOSE_FILES[@]}" up -d From cda8ee66fccb555fafee9e9d95a59d0c0130b619 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Thu, 24 Apr 2025 17:30:01 -0400 Subject: [PATCH 10/35] try to restart docker just before pulling --- http/src/main/resources/init-resources/init-actions.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index c4dc172b92a..90e058f8316 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -368,10 +368,11 @@ EOF cat /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}` fi + retry 5 docker-compose "${COMPOSE_FILES[@]}" config + # restart docker systemctl restart docker - retry 5 docker-compose "${COMPOSE_FILES[@]}" config retry 5 docker-compose "${COMPOSE_FILES[@]}" pull retry 5 docker-compose "${COMPOSE_FILES[@]}" up -d From f44e998807d5ffaf007962a035bc10b4655e5c4c Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Fri, 25 Apr 2025 14:44:51 -0400 Subject: [PATCH 11/35] try gcr mirror on proxy image --- .../src/main/resources/init-resources/proxy-docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/http/src/main/resources/init-resources/proxy-docker-compose.yaml b/http/src/main/resources/init-resources/proxy-docker-compose.yaml index 303bb216739..902a8613980 100644 --- a/http/src/main/resources/init-resources/proxy-docker-compose.yaml +++ b/http/src/main/resources/init-resources/proxy-docker-compose.yaml @@ -2,7 +2,7 @@ version: '2.4' services: proxy: container_name: "${PROXY_SERVER_NAME}" - image: "${PROXY_DOCKER_IMAGE}" + image: "mirror.gcr.io/${PROXY_DOCKER_IMAGE}" network_mode: host volumes: - ${CERT_DIRECTORY}/jupyter-server.crt:/etc/ssl/certs/server.crt:ro From 5d8581edce5886d8260e1d15c09383949bd5f4ba Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Tue, 29 Apr 2025 11:35:56 -0400 Subject: [PATCH 12/35] comment out part of init action that do not work so I can ssh into the VM and investigate --- .../resources/init-resources/init-actions.sh | 346 +++++++++--------- 1 file changed, 173 insertions(+), 173 deletions(-) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index 90e058f8316..295ba31f375 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -412,179 +412,179 @@ EOF # "serverExtensions": {}, # "combinedExtensions": {} # } - if [ ! -z "${JUPYTER_NB_EXTENSIONS}" ] ; then - for ext in ${JUPYTER_NB_EXTENSIONS} - do - log 'Installing Jupyter NB extension [$ext]...' - if [[ $ext == 'gs://'* ]]; then - gsutil cp $ext /etc - JUPYTER_EXTENSION_ARCHIVE=`basename $ext` - docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} - retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} - elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then - JUPYTER_EXTENSION_FILE=`basename $ext` - curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} - docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} - retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} - else - retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_notebook_extension.sh $ext - fi - done - fi - - STEP_TIMINGS+=($(date +%s)) - - # Install serverExtensions if provided by the user - if [ ! -z "${JUPYTER_SERVER_EXTENSIONS}" ] ; then - for ext in ${JUPYTER_SERVER_EXTENSIONS} - do - log 'Installing Jupyter server extension [$ext]...' - if [[ $ext == 'gs://'* ]]; then - gsutil cp $ext /etc - JUPYTER_EXTENSION_ARCHIVE=`basename $ext` - docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} - retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} - else - retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_server_extension.sh $ext - fi - done - fi - - STEP_TIMINGS+=($(date +%s)) - - # Install combined extensions if provided by the user - if [ ! -z "${JUPYTER_COMBINED_EXTENSIONS}" ] ; then - for ext in ${JUPYTER_COMBINED_EXTENSIONS} - do - log 'Installing Jupyter combined extension [$ext]...' - log $ext - if [[ $ext == 'gs://'* ]]; then - gsutil cp $ext /etc - JUPYTER_EXTENSION_ARCHIVE=`basename $ext` - docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} - retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE} - else - retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_combined_extension.sh $ext - fi - done - fi - - STEP_TIMINGS+=($(date +%s)) - - # If a user script was specified, copy it into the docker container and execute it. - if [ ! -z "$USER_SCRIPT_URI" ] ; then - apply_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME - fi - - # done user script - STEP_TIMINGS+=($(date +%s)) - - # If a start user script was specified, copy it into the docker container for consumption during startups. - if [ ! -z "$START_USER_SCRIPT_URI" ] ; then - apply_start_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME - fi - - # done start user script - STEP_TIMINGS+=($(date +%s)) - - # Install lab extensions if provided by the user - # Note: lab extensions need to installed as jupyter user, not root - if [ ! -z "${JUPYTER_LAB_EXTENSIONS}" ] ; then - for ext in ${JUPYTER_LAB_EXTENSIONS} - do - log 'Installing JupyterLab extension [$ext]...' - pwd - if [[ $ext == 'gs://'* ]]; then - gsutil cp -r $ext /etc - JUPYTER_EXTENSION_ARCHIVE=`basename $ext` - docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} - retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} - elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then - JUPYTER_EXTENSION_FILE=`basename $ext` - curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} - docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} - retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} - else - retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh $ext - fi - done - fi - - STEP_TIMINGS+=($(date +%s)) - - # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume - # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter - # kernel tries to connect to it. - docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true - - # jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network - # A better to do this might be to take welder host as an argument to the script - docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/custom/jupyter_delocalize.py" - - # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker. - # This is to make it so that older images will still work after we change notebooks location to home dir - docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py - - # Copy gitignore into jupyter container - docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global" - - # Install nbstripout and set gitignore in Git Config - docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \ - && python -m nbstripout --install --global \ - && git config --global core.excludesfile $JUPYTER_USER_HOME/gitignore_global" - - # Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU) - docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \ - && mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \ - && cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \ - && ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \ - && cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \ - && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ - && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ - && mkdir -p $JUPYTER_HOME/nbconfig" - - log 'Starting Jupyter Notebook...' - retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}" - - STEP_TIMINGS+=($(date +%s)) - fi - - # RStudio specific setup; only do if RStudio is installed - if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then - EXIT_CODE=0 - retry 3 docker exec ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/set_up_package_dir.sh || EXIT_CODE=$? - if [ $EXIT_CODE -ne 0 ]; then - echo "RStudio user package installation directory creation failed, creating /packages directory" - docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c "mkdir -p ${RSTUDIO_USER_HOME}/packages && chmod a+rwx ${RSTUDIO_USER_HOME}/packages" - fi - - # Add the EVs specified in rstudio-docker-compose.yaml to Renviron.site - retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'echo "GOOGLE_PROJECT=$GOOGLE_PROJECT -CLUSTER_NAME=$CLUSTER_NAME -RUNTIME_NAME=$RUNTIME_NAME -OWNER_EMAIL=$OWNER_EMAIL -SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC" >> /usr/local/lib/R/etc/Renviron.site' - - # Add custom_env_vars.env to Renviron.site - CUSTOM_ENV_VARS_FILE=/var/custom_env_vars.env - if [ -f "$CUSTOM_ENV_VARS_FILE" ]; then - retry 3 docker cp ${CUSTOM_ENV_VARS_FILE} ${RSTUDIO_SERVER_NAME}:/usr/local/lib/R/var/custom_env_vars.env - retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'cat /usr/local/lib/R/var/custom_env_vars.env >> /usr/local/lib/R/etc/Renviron.site' - fi - - # If a user script was specified, copy it into the docker container and execute it. - if [ ! -z "$USER_SCRIPT_URI" ] ; then - apply_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS - fi - - # If a start user script was specified, copy it into the docker container for consumption during startups. - if [ ! -z "$START_USER_SCRIPT_URI" ] ; then - apply_start_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS - fi - - # Start RStudio server - retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init - fi +# if [ ! -z "${JUPYTER_NB_EXTENSIONS}" ] ; then +# for ext in ${JUPYTER_NB_EXTENSIONS} +# do +# log 'Installing Jupyter NB extension [$ext]...' +# if [[ $ext == 'gs://'* ]]; then +# gsutil cp $ext /etc +# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` +# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} +# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} +# elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then +# JUPYTER_EXTENSION_FILE=`basename $ext` +# curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} +# docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} +# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} +# else +# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_notebook_extension.sh $ext +# fi +# done +# fi +# +# STEP_TIMINGS+=($(date +%s)) +# +# # Install serverExtensions if provided by the user +# if [ ! -z "${JUPYTER_SERVER_EXTENSIONS}" ] ; then +# for ext in ${JUPYTER_SERVER_EXTENSIONS} +# do +# log 'Installing Jupyter server extension [$ext]...' +# if [[ $ext == 'gs://'* ]]; then +# gsutil cp $ext /etc +# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` +# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} +# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} +# else +# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_server_extension.sh $ext +# fi +# done +# fi +# +# STEP_TIMINGS+=($(date +%s)) +# +# # Install combined extensions if provided by the user +# if [ ! -z "${JUPYTER_COMBINED_EXTENSIONS}" ] ; then +# for ext in ${JUPYTER_COMBINED_EXTENSIONS} +# do +# log 'Installing Jupyter combined extension [$ext]...' +# log $ext +# if [[ $ext == 'gs://'* ]]; then +# gsutil cp $ext /etc +# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` +# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} +# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE} +# else +# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_combined_extension.sh $ext +# fi +# done +# fi +# +# STEP_TIMINGS+=($(date +%s)) +# +# # If a user script was specified, copy it into the docker container and execute it. +# if [ ! -z "$USER_SCRIPT_URI" ] ; then +# apply_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME +# fi +# +# # done user script +# STEP_TIMINGS+=($(date +%s)) +# +# # If a start user script was specified, copy it into the docker container for consumption during startups. +# if [ ! -z "$START_USER_SCRIPT_URI" ] ; then +# apply_start_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME +# fi +# +# # done start user script +# STEP_TIMINGS+=($(date +%s)) +# +# # Install lab extensions if provided by the user +# # Note: lab extensions need to installed as jupyter user, not root +# if [ ! -z "${JUPYTER_LAB_EXTENSIONS}" ] ; then +# for ext in ${JUPYTER_LAB_EXTENSIONS} +# do +# log 'Installing JupyterLab extension [$ext]...' +# pwd +# if [[ $ext == 'gs://'* ]]; then +# gsutil cp -r $ext /etc +# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` +# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} +# retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} +# elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then +# JUPYTER_EXTENSION_FILE=`basename $ext` +# curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} +# docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} +# retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} +# else +# retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh $ext +# fi +# done +# fi +# +# STEP_TIMINGS+=($(date +%s)) +# +# # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume +# # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter +# # kernel tries to connect to it. +# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true +# +# # jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network +# # A better to do this might be to take welder host as an argument to the script +# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/custom/jupyter_delocalize.py" +# +# # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker. +# # This is to make it so that older images will still work after we change notebooks location to home dir +# docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py +# +# # Copy gitignore into jupyter container +# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global" +# +# # Install nbstripout and set gitignore in Git Config +# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \ +# && python -m nbstripout --install --global \ +# && git config --global core.excludesfile $JUPYTER_USER_HOME/gitignore_global" +# +# # Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU) +# docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \ +# && mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \ +# && cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \ +# && ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \ +# && cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \ +# && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ +# && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ +# && mkdir -p $JUPYTER_HOME/nbconfig" +# +# log 'Starting Jupyter Notebook...' +# retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}" +# +# STEP_TIMINGS+=($(date +%s)) +# fi +# +# # RStudio specific setup; only do if RStudio is installed +# if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then +# EXIT_CODE=0 +# retry 3 docker exec ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/set_up_package_dir.sh || EXIT_CODE=$? +# if [ $EXIT_CODE -ne 0 ]; then +# echo "RStudio user package installation directory creation failed, creating /packages directory" +# docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c "mkdir -p ${RSTUDIO_USER_HOME}/packages && chmod a+rwx ${RSTUDIO_USER_HOME}/packages" +# fi +# +# # Add the EVs specified in rstudio-docker-compose.yaml to Renviron.site +# retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'echo "GOOGLE_PROJECT=$GOOGLE_PROJECT +#CLUSTER_NAME=$CLUSTER_NAME +#RUNTIME_NAME=$RUNTIME_NAME +#OWNER_EMAIL=$OWNER_EMAIL +#SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC" >> /usr/local/lib/R/etc/Renviron.site' +# +# # Add custom_env_vars.env to Renviron.site +# CUSTOM_ENV_VARS_FILE=/var/custom_env_vars.env +# if [ -f "$CUSTOM_ENV_VARS_FILE" ]; then +# retry 3 docker cp ${CUSTOM_ENV_VARS_FILE} ${RSTUDIO_SERVER_NAME}:/usr/local/lib/R/var/custom_env_vars.env +# retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'cat /usr/local/lib/R/var/custom_env_vars.env >> /usr/local/lib/R/etc/Renviron.site' +# fi +# +# # If a user script was specified, copy it into the docker container and execute it. +# if [ ! -z "$USER_SCRIPT_URI" ] ; then +# apply_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS +# fi +# +# # If a start user script was specified, copy it into the docker container for consumption during startups. +# if [ ! -z "$START_USER_SCRIPT_URI" ] ; then +# apply_start_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS +# fi +# +# # Start RStudio server +# retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init +# fi # Remove any unneeded cached images to save disk space. # Do this asynchronously so it doesn't hold up cluster creation From 8882b07dea6d43dc619b368164cec74750cce19d Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Tue, 29 Apr 2025 13:23:45 -0400 Subject: [PATCH 13/35] fix typo --- http/src/main/resources/init-resources/init-actions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index 295ba31f375..6fda8bd17b5 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -584,7 +584,7 @@ EOF # # # Start RStudio server # retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init -# fi + fi # Remove any unneeded cached images to save disk space. # Do this asynchronously so it doesn't hold up cluster creation From 63b6b78dcb258c6fc7c265474cfacf0f371ac5d7 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Tue, 29 Apr 2025 16:58:02 -0400 Subject: [PATCH 14/35] bypass nb extension issue for now --- .../resources/init-resources/init-actions.sh | 346 +++++++++--------- 1 file changed, 173 insertions(+), 173 deletions(-) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index 6fda8bd17b5..b791c3da3bc 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -232,7 +232,7 @@ if [[ "${ROLE}" == 'Master' ]]; then WELDER_DOCKER_COMPOSE=$(welderDockerCompose) PROXY_SITE_CONF=$(proxySiteConf) JUPYTER_SERVER_EXTENSIONS=$(jupyterServerExtensions) - JUPYTER_NB_EXTENSIONS=$(jupyterNbExtensions) + JUPYTER_NB_EXTENSIONS=https://app.terra.bio/jupyter-iframe-extension.js #$(jupyterNbExtensions) JUPYTER_COMBINED_EXTENSIONS=$(jupyterCombinedExtensions) JUPYTER_LAB_EXTENSIONS=$(jupyterLabExtensions) USER_SCRIPT_URI=$(userScriptUri) @@ -412,178 +412,178 @@ EOF # "serverExtensions": {}, # "combinedExtensions": {} # } -# if [ ! -z "${JUPYTER_NB_EXTENSIONS}" ] ; then -# for ext in ${JUPYTER_NB_EXTENSIONS} -# do -# log 'Installing Jupyter NB extension [$ext]...' -# if [[ $ext == 'gs://'* ]]; then -# gsutil cp $ext /etc -# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` -# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} -# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} -# elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then -# JUPYTER_EXTENSION_FILE=`basename $ext` -# curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} -# docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} -# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} -# else -# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_notebook_extension.sh $ext -# fi -# done -# fi -# -# STEP_TIMINGS+=($(date +%s)) -# -# # Install serverExtensions if provided by the user -# if [ ! -z "${JUPYTER_SERVER_EXTENSIONS}" ] ; then -# for ext in ${JUPYTER_SERVER_EXTENSIONS} -# do -# log 'Installing Jupyter server extension [$ext]...' -# if [[ $ext == 'gs://'* ]]; then -# gsutil cp $ext /etc -# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` -# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} -# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} -# else -# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_server_extension.sh $ext -# fi -# done -# fi -# -# STEP_TIMINGS+=($(date +%s)) -# -# # Install combined extensions if provided by the user -# if [ ! -z "${JUPYTER_COMBINED_EXTENSIONS}" ] ; then -# for ext in ${JUPYTER_COMBINED_EXTENSIONS} -# do -# log 'Installing Jupyter combined extension [$ext]...' -# log $ext -# if [[ $ext == 'gs://'* ]]; then -# gsutil cp $ext /etc -# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` -# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} -# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE} -# else -# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_combined_extension.sh $ext -# fi -# done -# fi -# -# STEP_TIMINGS+=($(date +%s)) -# -# # If a user script was specified, copy it into the docker container and execute it. -# if [ ! -z "$USER_SCRIPT_URI" ] ; then -# apply_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME -# fi -# -# # done user script -# STEP_TIMINGS+=($(date +%s)) -# -# # If a start user script was specified, copy it into the docker container for consumption during startups. -# if [ ! -z "$START_USER_SCRIPT_URI" ] ; then -# apply_start_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME -# fi -# -# # done start user script -# STEP_TIMINGS+=($(date +%s)) -# -# # Install lab extensions if provided by the user -# # Note: lab extensions need to installed as jupyter user, not root -# if [ ! -z "${JUPYTER_LAB_EXTENSIONS}" ] ; then -# for ext in ${JUPYTER_LAB_EXTENSIONS} -# do -# log 'Installing JupyterLab extension [$ext]...' -# pwd -# if [[ $ext == 'gs://'* ]]; then -# gsutil cp -r $ext /etc -# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` -# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} -# retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} -# elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then -# JUPYTER_EXTENSION_FILE=`basename $ext` -# curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} -# docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} -# retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} -# else -# retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh $ext -# fi -# done -# fi -# -# STEP_TIMINGS+=($(date +%s)) -# -# # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume -# # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter -# # kernel tries to connect to it. -# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true -# -# # jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network -# # A better to do this might be to take welder host as an argument to the script -# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/custom/jupyter_delocalize.py" -# -# # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker. -# # This is to make it so that older images will still work after we change notebooks location to home dir -# docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py -# -# # Copy gitignore into jupyter container -# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global" -# -# # Install nbstripout and set gitignore in Git Config -# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \ -# && python -m nbstripout --install --global \ -# && git config --global core.excludesfile $JUPYTER_USER_HOME/gitignore_global" -# -# # Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU) -# docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \ -# && mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \ -# && cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \ -# && ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \ -# && cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \ -# && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ -# && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ -# && mkdir -p $JUPYTER_HOME/nbconfig" -# -# log 'Starting Jupyter Notebook...' -# retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}" -# -# STEP_TIMINGS+=($(date +%s)) -# fi -# -# # RStudio specific setup; only do if RStudio is installed -# if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then -# EXIT_CODE=0 -# retry 3 docker exec ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/set_up_package_dir.sh || EXIT_CODE=$? -# if [ $EXIT_CODE -ne 0 ]; then -# echo "RStudio user package installation directory creation failed, creating /packages directory" -# docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c "mkdir -p ${RSTUDIO_USER_HOME}/packages && chmod a+rwx ${RSTUDIO_USER_HOME}/packages" -# fi -# -# # Add the EVs specified in rstudio-docker-compose.yaml to Renviron.site -# retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'echo "GOOGLE_PROJECT=$GOOGLE_PROJECT -#CLUSTER_NAME=$CLUSTER_NAME -#RUNTIME_NAME=$RUNTIME_NAME -#OWNER_EMAIL=$OWNER_EMAIL -#SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC" >> /usr/local/lib/R/etc/Renviron.site' -# -# # Add custom_env_vars.env to Renviron.site -# CUSTOM_ENV_VARS_FILE=/var/custom_env_vars.env -# if [ -f "$CUSTOM_ENV_VARS_FILE" ]; then -# retry 3 docker cp ${CUSTOM_ENV_VARS_FILE} ${RSTUDIO_SERVER_NAME}:/usr/local/lib/R/var/custom_env_vars.env -# retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'cat /usr/local/lib/R/var/custom_env_vars.env >> /usr/local/lib/R/etc/Renviron.site' -# fi -# -# # If a user script was specified, copy it into the docker container and execute it. -# if [ ! -z "$USER_SCRIPT_URI" ] ; then -# apply_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS -# fi -# -# # If a start user script was specified, copy it into the docker container for consumption during startups. -# if [ ! -z "$START_USER_SCRIPT_URI" ] ; then -# apply_start_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS -# fi -# -# # Start RStudio server -# retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init + if [ ! -z "${JUPYTER_NB_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_NB_EXTENSIONS} + do + log 'Installing Jupyter NB extension [$ext]...' + if [[ $ext == 'gs://'* ]]; then + gsutil cp $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then + JUPYTER_EXTENSION_FILE=`basename $ext` + curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} + docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + else + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_notebook_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # Install serverExtensions if provided by the user + if [ ! -z "${JUPYTER_SERVER_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_SERVER_EXTENSIONS} + do + log 'Installing Jupyter server extension [$ext]...' + if [[ $ext == 'gs://'* ]]; then + gsutil cp $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + else + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_server_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # Install combined extensions if provided by the user + if [ ! -z "${JUPYTER_COMBINED_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_COMBINED_EXTENSIONS} + do + log 'Installing Jupyter combined extension [$ext]...' + log $ext + if [[ $ext == 'gs://'* ]]; then + gsutil cp $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE} + else + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_combined_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # If a user script was specified, copy it into the docker container and execute it. + if [ ! -z "$USER_SCRIPT_URI" ] ; then + apply_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME + fi + + # done user script + STEP_TIMINGS+=($(date +%s)) + + # If a start user script was specified, copy it into the docker container for consumption during startups. + if [ ! -z "$START_USER_SCRIPT_URI" ] ; then + apply_start_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME + fi + + # done start user script + STEP_TIMINGS+=($(date +%s)) + + # Install lab extensions if provided by the user + # Note: lab extensions need to installed as jupyter user, not root + if [ ! -z "${JUPYTER_LAB_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_LAB_EXTENSIONS} + do + log 'Installing JupyterLab extension [$ext]...' + pwd + if [[ $ext == 'gs://'* ]]; then + gsutil cp -r $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then + JUPYTER_EXTENSION_FILE=`basename $ext` + curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} + docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + else + retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume + # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter + # kernel tries to connect to it. + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true + + # jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network + # A better to do this might be to take welder host as an argument to the script + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/custom/jupyter_delocalize.py" + + # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker. + # This is to make it so that older images will still work after we change notebooks location to home dir + docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py + + # Copy gitignore into jupyter container + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global" + + # Install nbstripout and set gitignore in Git Config + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \ + && python -m nbstripout --install --global \ + && git config --global core.excludesfile $JUPYTER_USER_HOME/gitignore_global" + + # Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU) + docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \ + && mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \ + && cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \ + && ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \ + && cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \ + && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ + && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ + && mkdir -p $JUPYTER_HOME/nbconfig" + + log 'Starting Jupyter Notebook...' + retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}" + + STEP_TIMINGS+=($(date +%s)) + fi + + # RStudio specific setup; only do if RStudio is installed + if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then + EXIT_CODE=0 + retry 3 docker exec ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/set_up_package_dir.sh || EXIT_CODE=$? + if [ $EXIT_CODE -ne 0 ]; then + echo "RStudio user package installation directory creation failed, creating /packages directory" + docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c "mkdir -p ${RSTUDIO_USER_HOME}/packages && chmod a+rwx ${RSTUDIO_USER_HOME}/packages" + fi + + # Add the EVs specified in rstudio-docker-compose.yaml to Renviron.site + retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'echo "GOOGLE_PROJECT=$GOOGLE_PROJECT +CLUSTER_NAME=$CLUSTER_NAME +RUNTIME_NAME=$RUNTIME_NAME +OWNER_EMAIL=$OWNER_EMAIL +SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC" >> /usr/local/lib/R/etc/Renviron.site' + + # Add custom_env_vars.env to Renviron.site + CUSTOM_ENV_VARS_FILE=/var/custom_env_vars.env + if [ -f "$CUSTOM_ENV_VARS_FILE" ]; then + retry 3 docker cp ${CUSTOM_ENV_VARS_FILE} ${RSTUDIO_SERVER_NAME}:/usr/local/lib/R/var/custom_env_vars.env + retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'cat /usr/local/lib/R/var/custom_env_vars.env >> /usr/local/lib/R/etc/Renviron.site' + fi + + # If a user script was specified, copy it into the docker container and execute it. + if [ ! -z "$USER_SCRIPT_URI" ] ; then + apply_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS + fi + + # If a start user script was specified, copy it into the docker container for consumption during startups. + if [ ! -z "$START_USER_SCRIPT_URI" ] ; then + apply_start_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS + fi + + # Start RStudio server + retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init fi # Remove any unneeded cached images to save disk space. From 4f153c3c9aafc1974235d1b02677c6419c1f5aba Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Wed, 30 Apr 2025 10:00:03 -0400 Subject: [PATCH 15/35] continue bypassing network connectivuty issues --- .../resources/init-resources/init-actions.sh | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index b791c3da3bc..a169c5a9969 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -525,23 +525,23 @@ EOF # This is to make it so that older images will still work after we change notebooks location to home dir docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py - # Copy gitignore into jupyter container - docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global" - - # Install nbstripout and set gitignore in Git Config - docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \ - && python -m nbstripout --install --global \ - && git config --global core.excludesfile $JUPYTER_USER_HOME/gitignore_global" - - # Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU) - docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \ - && mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \ - && cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \ - && ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \ - && cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \ - && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ - && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ - && mkdir -p $JUPYTER_HOME/nbconfig" +# # Copy gitignore into jupyter container +# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global" +# +# # Install nbstripout and set gitignore in Git Config +# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \ +# && python -m nbstripout --install --global \ +# && git config --global core.excludesfile $JUPYTER_USER_HOME/gitignore_global" +# +# # Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU) +# docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \ +# && mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \ +# && cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \ +# && ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \ +# && cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \ +# && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ +# && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ +# && mkdir -p $JUPYTER_HOME/nbconfig" log 'Starting Jupyter Notebook...' retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}" From d069eb6788971236676b86cff2e6940943c3baa4 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Wed, 30 Apr 2025 15:44:29 -0400 Subject: [PATCH 16/35] match ipc mode with previous settings --- http/src/main/resources/init-resources/proxy-docker-compose.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/http/src/main/resources/init-resources/proxy-docker-compose.yaml b/http/src/main/resources/init-resources/proxy-docker-compose.yaml index 902a8613980..932c6bfbfc9 100644 --- a/http/src/main/resources/init-resources/proxy-docker-compose.yaml +++ b/http/src/main/resources/init-resources/proxy-docker-compose.yaml @@ -4,6 +4,7 @@ services: container_name: "${PROXY_SERVER_NAME}" image: "mirror.gcr.io/${PROXY_DOCKER_IMAGE}" network_mode: host + ipc: shareable volumes: - ${CERT_DIRECTORY}/jupyter-server.crt:/etc/ssl/certs/server.crt:ro - ${CERT_DIRECTORY}/jupyter-server.key:/etc/ssl/private/server.key:ro From ef8ac936b9da7f5d54e698945fedfe0f2faab625 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Wed, 30 Apr 2025 17:31:34 -0400 Subject: [PATCH 17/35] start from blank init action --- .../resources/init-resources/init-actions.sh | 634 ++++++++---------- 1 file changed, 292 insertions(+), 342 deletions(-) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index a169c5a9969..42889a593a0 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -148,28 +148,17 @@ STEP_TIMINGS=($(date +%s)) # See https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/configuration#default. # Detect dataproc image version from its various names -if (! test -v DATAPROC_IMAGE_VERSION) && test -v DATAPROC_VERSION; then - DATAPROC_IMAGE_VERSION="${DATAPROC_VERSION}" -fi - -if [[ $(echo "${DATAPROC_IMAGE_VERSION} < 2.2" | bc -l) == 1 ]]; then - echo "This Dataproc cluster node runs image version ${DATAPROC_IMAGE_VERSION} with pre-installed legacy monitoring agent. Skipping Ops Agent installation." - exit 0 -fi - -curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh -bash add-google-cloud-ops-agent-repo.sh --also-install -# -#cat <> /etc/google-cloud-ops-agent/config.yaml -#logging: -# service: -# pipelines: -# default_pipeline: -# receivers: [] -#EOF +#if (! test -v DATAPROC_IMAGE_VERSION) && test -v DATAPROC_VERSION; then +# DATAPROC_IMAGE_VERSION="${DATAPROC_VERSION}" +#fi +# +#if [[ $(echo "${DATAPROC_IMAGE_VERSION} < 2.2" | bc -l) == 1 ]]; then +# echo "This Dataproc cluster node runs image version ${DATAPROC_IMAGE_VERSION} with pre-installed legacy monitoring agent. Skipping Ops Agent installation." +# exit 0 +#fi # -#systemctl restart google-cloud-ops-agent -### +#curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh +#bash add-google-cloud-ops-agent-repo.sh --also-install # temp workaround for https://github.com/docker/compose/issues/5930 export CLOUDSDK_PYTHON=python3 @@ -232,7 +221,7 @@ if [[ "${ROLE}" == 'Master' ]]; then WELDER_DOCKER_COMPOSE=$(welderDockerCompose) PROXY_SITE_CONF=$(proxySiteConf) JUPYTER_SERVER_EXTENSIONS=$(jupyterServerExtensions) - JUPYTER_NB_EXTENSIONS=https://app.terra.bio/jupyter-iframe-extension.js #$(jupyterNbExtensions) + JUPYTER_NB_EXTENSIONS=$(jupyterNbExtensions) JUPYTER_COMBINED_EXTENSIONS=$(jupyterCombinedExtensions) JUPYTER_LAB_EXTENSIONS=$(jupyterLabExtensions) USER_SCRIPT_URI=$(userScriptUri) @@ -247,284 +236,245 @@ if [[ "${ROLE}" == 'Master' ]]; then INIT_BUCKET_NAME=$(initBucketName) STEP_TIMINGS+=($(date +%s)) - - log 'Copying secrets from GCS...' - - mkdir /work - mkdir /certs - chmod a+rwx /work - - # Add the certificates from the bucket to the VM. They are used by the docker-compose file - gsutil cp ${SERVER_CRT} /certs - gsutil cp ${SERVER_KEY} /certs - gsutil cp ${ROOT_CA} /certs - gsutil cp gs://${INIT_BUCKET_NAME}/* ${DOCKER_COMPOSE_FILES_DIRECTORY} - - - # GCP connector is used by dataproc to connect with the staging bucket to read the logs - touch /hadoop_gcs_connector_metadata_cache - touch auth_openidc.conf - - - # Add ops agent configuration for welder, jupyter, user startup and shutdown scripts - cat <> /etc/google-cloud-ops-agent/config.yaml - logging: - receivers: - welder: - type: files - include_paths: [/work/welder.log] - jupyter: - type: files - include_paths: [/work/jupyter.log] - daemon: - type: files - include_paths: [/var/log/daemon.log] - service: - pipelines: - default_pipeline: - receivers: [welder, jupyter, daemon] -EOF - systemctl restart google-cloud-ops-agent - - ## Note that the stack driver configuration is changing in later versions of Dataproc, see https://broadworkbench.atlassian.net/browse/IA-5023 - # Add stack driver configuration for welder -# tee /etc/google-fluentd/config.d/welder.conf << END -# -# @type tail -# format json -# path /work/welder.log -# pos_file /var/tmp/fluentd.welder.pos -# read_from_head true -# tag welder -# -#END -# -# # Add stack driver configuration for jupyter -# tee /etc/google-fluentd/config.d/jupyter.conf << END -# -# @type tail -# format none -# path /work/jupyter.log -# pos_file /var/tmp/fluentd.jupyter.pos -# read_from_head true -# tag jupyter -# -#END -# -# # Add stack driver configuration for user startup and shutdown scripts -# tee /etc/google-fluentd/config.d/daemon.conf << END -# -# @type tail -# format none -# path /var/log/daemon.log -# pos_file /var/tmp/fluentd.google.user.daemon.pos -# read_from_head true -# tag daemon -# -#END -# -# service google-fluentd reload - - # Install env var config - if [ ! -z ${CUSTOM_ENV_VARS_CONFIG_URI} ] ; then - log 'Copy custom env vars config...' - gsutil cp ${CUSTOM_ENV_VARS_CONFIG_URI} /var - fi - - - # If any image is hosted in a GAR registry (detected by regex) then - # authorize docker to interact with gcr.io. - # NOTE: GCR images are now hosted on GAR, but the file paths haven't changed, they automatically redirect. - if grep -qF "gcr.io" <<< "${JUPYTER_DOCKER_IMAGE}${RSTUDIO_DOCKER_IMAGE}${PROXY_DOCKER_IMAGE}${WELDER_DOCKER_IMAGE}" ; then - log 'Authorizing GCR/GAR...' - gcloud auth configure-docker - fi - - STEP_TIMINGS+=($(date +%s)) - - log 'Starting up the Jupyter docker...' - - # Run docker-compose for each specified compose file. - # Note the `docker-compose pull` is retried to avoid intermittent network errors, but - # `docker-compose up` is not retried. - COMPOSE_FILES=(-f /etc/`basename ${PROXY_DOCKER_COMPOSE}`) - - cat /etc/`basename ${PROXY_DOCKER_COMPOSE}` - - if [ ! -z ${WELDER_DOCKER_IMAGE} ] && [ "${WELDER_ENABLED}" == "true" ] ; then - COMPOSE_FILES+=(-f /etc/`basename ${WELDER_DOCKER_COMPOSE}`) - cat /etc/`basename ${WELDER_DOCKER_COMPOSE}` - fi - - if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then - TOOL_SERVER_NAME=${JUPYTER_SERVER_NAME} - COMPOSE_FILES+=(-f /etc/`basename ${JUPYTER_DOCKER_COMPOSE}`) - cat /etc/`basename ${JUPYTER_DOCKER_COMPOSE}` - fi - - if [ ! -z ${RSTUDIO_DOCKER_IMAGE} ] ; then - TOOL_SERVER_NAME=${RSTUDIO_SERVER_NAME} - COMPOSE_FILES+=(-f /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}`) - cat /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}` - fi - - retry 5 docker-compose "${COMPOSE_FILES[@]}" config - - # restart docker - systemctl restart docker - - retry 5 docker-compose "${COMPOSE_FILES[@]}" pull - retry 5 docker-compose "${COMPOSE_FILES[@]}" up -d - - # Start up crypto detector, if enabled. - # This should be started after other containers. - # Use `docker run` instead of docker-compose so we can link it to the Jupyter/RStudio container's network. - # See https://github.com/broadinstitute/terra-cryptomining-security-alerts/tree/master/v2 - if [ ! -z "$CRYPTO_DETECTOR_DOCKER_IMAGE" ] ; then - docker run --name=${CRYPTO_DETECTOR_SERVER_NAME} --rm -d \ - --net=container:${TOOL_SERVER_NAME} ${CRYPTO_DETECTOR_DOCKER_IMAGE} - fi - - STEP_TIMINGS+=($(date +%s)) - - # Jupyter-specific setup, only do if Jupyter is installed - if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then - log 'Installing Jupydocker kernelspecs...' - - # Install notebook.json - if [ ! -z ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} ] ; then - log 'Copy Jupyter frontend notebook config...' - gsutil cp ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} /etc - JUPYTER_NOTEBOOK_FRONTEND_CONFIG=`basename ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI}` - retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "mkdir -p $JUPYTER_HOME/nbconfig" - docker cp /etc/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/ - fi - - STEP_TIMINGS+=($(date +%s)) - - # Install NbExtensions. These are user-specified Jupyter extensions. - # For instance Terra UI is passing - # { - # "nbExtensions": { - # "saturn-iframe-extension": "https://bvdp-saturn-dev.appspot.com/jupyter-iframe-extension.js" - # }, - # "labExtensions": {}, - # "serverExtensions": {}, - # "combinedExtensions": {} - # } - if [ ! -z "${JUPYTER_NB_EXTENSIONS}" ] ; then - for ext in ${JUPYTER_NB_EXTENSIONS} - do - log 'Installing Jupyter NB extension [$ext]...' - if [[ $ext == 'gs://'* ]]; then - gsutil cp $ext /etc - JUPYTER_EXTENSION_ARCHIVE=`basename $ext` - docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} - retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} - elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then - JUPYTER_EXTENSION_FILE=`basename $ext` - curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} - docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} - retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} - else - retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_notebook_extension.sh $ext - fi - done - fi - - STEP_TIMINGS+=($(date +%s)) - - # Install serverExtensions if provided by the user - if [ ! -z "${JUPYTER_SERVER_EXTENSIONS}" ] ; then - for ext in ${JUPYTER_SERVER_EXTENSIONS} - do - log 'Installing Jupyter server extension [$ext]...' - if [[ $ext == 'gs://'* ]]; then - gsutil cp $ext /etc - JUPYTER_EXTENSION_ARCHIVE=`basename $ext` - docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} - retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} - else - retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_server_extension.sh $ext - fi - done - fi - - STEP_TIMINGS+=($(date +%s)) - - # Install combined extensions if provided by the user - if [ ! -z "${JUPYTER_COMBINED_EXTENSIONS}" ] ; then - for ext in ${JUPYTER_COMBINED_EXTENSIONS} - do - log 'Installing Jupyter combined extension [$ext]...' - log $ext - if [[ $ext == 'gs://'* ]]; then - gsutil cp $ext /etc - JUPYTER_EXTENSION_ARCHIVE=`basename $ext` - docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} - retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE} - else - retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_combined_extension.sh $ext - fi - done - fi - - STEP_TIMINGS+=($(date +%s)) - - # If a user script was specified, copy it into the docker container and execute it. - if [ ! -z "$USER_SCRIPT_URI" ] ; then - apply_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME - fi - - # done user script - STEP_TIMINGS+=($(date +%s)) - - # If a start user script was specified, copy it into the docker container for consumption during startups. - if [ ! -z "$START_USER_SCRIPT_URI" ] ; then - apply_start_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME - fi - - # done start user script - STEP_TIMINGS+=($(date +%s)) - - # Install lab extensions if provided by the user - # Note: lab extensions need to installed as jupyter user, not root - if [ ! -z "${JUPYTER_LAB_EXTENSIONS}" ] ; then - for ext in ${JUPYTER_LAB_EXTENSIONS} - do - log 'Installing JupyterLab extension [$ext]...' - pwd - if [[ $ext == 'gs://'* ]]; then - gsutil cp -r $ext /etc - JUPYTER_EXTENSION_ARCHIVE=`basename $ext` - docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} - retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} - elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then - JUPYTER_EXTENSION_FILE=`basename $ext` - curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} - docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} - retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} - else - retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh $ext - fi - done - fi - - STEP_TIMINGS+=($(date +%s)) - - # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume - # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter - # kernel tries to connect to it. - docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true - - # jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network - # A better to do this might be to take welder host as an argument to the script - docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/custom/jupyter_delocalize.py" - - # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker. - # This is to make it so that older images will still work after we change notebooks location to home dir - docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py - +# +# log 'Copying secrets from GCS...' +# +# mkdir /work +# mkdir /certs +# chmod a+rwx /work +# +# # Add the certificates from the bucket to the VM. They are used by the docker-compose file +# gsutil cp ${SERVER_CRT} /certs +# gsutil cp ${SERVER_KEY} /certs +# gsutil cp ${ROOT_CA} /certs +# gsutil cp gs://${INIT_BUCKET_NAME}/* ${DOCKER_COMPOSE_FILES_DIRECTORY} +# +# +# # GCP connector is used by dataproc to connect with the staging bucket to read the logs +# touch /hadoop_gcs_connector_metadata_cache +# touch auth_openidc.conf +# +# +# # Add ops agent configuration for welder, jupyter, user startup and shutdown scripts +# cat <> /etc/google-cloud-ops-agent/config.yaml +# logging: +# receivers: +# welder: +# type: files +# include_paths: [/work/welder.log] +# jupyter: +# type: files +# include_paths: [/work/jupyter.log] +# daemon: +# type: files +# include_paths: [/var/log/daemon.log] +# service: +# pipelines: +# default_pipeline: +# receivers: [welder, jupyter, daemon] +#EOF +# systemctl restart google-cloud-ops-agent +# +# # Install env var config +# if [ ! -z ${CUSTOM_ENV_VARS_CONFIG_URI} ] ; then +# log 'Copy custom env vars config...' +# gsutil cp ${CUSTOM_ENV_VARS_CONFIG_URI} /var +# fi +# +# +# # If any image is hosted in a GAR registry (detected by regex) then +# # authorize docker to interact with gcr.io. +# # NOTE: GCR images are now hosted on GAR, but the file paths haven't changed, they automatically redirect. +# if grep -qF "gcr.io" <<< "${JUPYTER_DOCKER_IMAGE}${RSTUDIO_DOCKER_IMAGE}${PROXY_DOCKER_IMAGE}${WELDER_DOCKER_IMAGE}" ; then +# log 'Authorizing GCR/GAR...' +# gcloud auth configure-docker +# fi +# +# STEP_TIMINGS+=($(date +%s)) +# +# log 'Starting up the Jupyter docker...' +# +# # Run docker-compose for each specified compose file. +# # Note the `docker-compose pull` is retried to avoid intermittent network errors, but +# # `docker-compose up` is not retried. +# COMPOSE_FILES=(-f /etc/`basename ${PROXY_DOCKER_COMPOSE}`) +# +# cat /etc/`basename ${PROXY_DOCKER_COMPOSE}` +# +# if [ ! -z ${WELDER_DOCKER_IMAGE} ] && [ "${WELDER_ENABLED}" == "true" ] ; then +# COMPOSE_FILES+=(-f /etc/`basename ${WELDER_DOCKER_COMPOSE}`) +# cat /etc/`basename ${WELDER_DOCKER_COMPOSE}` +# fi +# +# if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then +# TOOL_SERVER_NAME=${JUPYTER_SERVER_NAME} +# COMPOSE_FILES+=(-f /etc/`basename ${JUPYTER_DOCKER_COMPOSE}`) +# cat /etc/`basename ${JUPYTER_DOCKER_COMPOSE}` +# fi +# +# if [ ! -z ${RSTUDIO_DOCKER_IMAGE} ] ; then +# TOOL_SERVER_NAME=${RSTUDIO_SERVER_NAME} +# COMPOSE_FILES+=(-f /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}`) +# cat /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}` +# fi +# +# retry 5 docker-compose "${COMPOSE_FILES[@]}" config +# +# # restart docker +# systemctl restart docker +# +# retry 5 docker-compose "${COMPOSE_FILES[@]}" pull +# retry 5 docker-compose "${COMPOSE_FILES[@]}" up -d +# +# # Start up crypto detector, if enabled. +# # This should be started after other containers. +# # Use `docker run` instead of docker-compose so we can link it to the Jupyter/RStudio container's network. +# # See https://github.com/broadinstitute/terra-cryptomining-security-alerts/tree/master/v2 +# if [ ! -z "$CRYPTO_DETECTOR_DOCKER_IMAGE" ] ; then +# docker run --name=${CRYPTO_DETECTOR_SERVER_NAME} --rm -d \ +# --net=container:${TOOL_SERVER_NAME} ${CRYPTO_DETECTOR_DOCKER_IMAGE} +# fi +# +# STEP_TIMINGS+=($(date +%s)) +# +# # Jupyter-specific setup, only do if Jupyter is installed +# if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then +# log 'Installing Jupydocker kernelspecs...' +# +# # Install notebook.json +# if [ ! -z ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} ] ; then +# log 'Copy Jupyter frontend notebook config...' +# gsutil cp ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} /etc +# JUPYTER_NOTEBOOK_FRONTEND_CONFIG=`basename ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI}` +# retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "mkdir -p $JUPYTER_HOME/nbconfig" +# docker cp /etc/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/ +# fi +# +# STEP_TIMINGS+=($(date +%s)) +# +# # Install NbExtensions. These are user-specified Jupyter extensions. +# # For instance Terra UI is passing +# # { +# # "nbExtensions": { +# # "saturn-iframe-extension": "https://bvdp-saturn-dev.appspot.com/jupyter-iframe-extension.js" +# # }, +# # "labExtensions": {}, +# # "serverExtensions": {}, +# # "combinedExtensions": {} +# # } +# if [ ! -z "${JUPYTER_NB_EXTENSIONS}" ] ; then +# for ext in ${JUPYTER_NB_EXTENSIONS} +# do +# log 'Installing Jupyter NB extension [$ext]...' +# if [[ $ext == 'gs://'* ]]; then +# gsutil cp $ext /etc +# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` +# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} +# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} +# elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then +# JUPYTER_EXTENSION_FILE=`basename $ext` +# curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} +# docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} +# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} +# else +# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_notebook_extension.sh $ext +# fi +# done +# fi +# +# STEP_TIMINGS+=($(date +%s)) +# +# # Install serverExtensions if provided by the user +# if [ ! -z "${JUPYTER_SERVER_EXTENSIONS}" ] ; then +# for ext in ${JUPYTER_SERVER_EXTENSIONS} +# do +# log 'Installing Jupyter server extension [$ext]...' +# if [[ $ext == 'gs://'* ]]; then +# gsutil cp $ext /etc +# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` +# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} +# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} +# else +# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_server_extension.sh $ext +# fi +# done +# fi +# +# STEP_TIMINGS+=($(date +%s)) +# +# # Install combined extensions if provided by the user +# if [ ! -z "${JUPYTER_COMBINED_EXTENSIONS}" ] ; then +# for ext in ${JUPYTER_COMBINED_EXTENSIONS} +# do +# log 'Installing Jupyter combined extension [$ext]...' +# log $ext +# if [[ $ext == 'gs://'* ]]; then +# gsutil cp $ext /etc +# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` +# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} +# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE} +# else +# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_combined_extension.sh $ext +# fi +# done +# fi +# +# STEP_TIMINGS+=($(date +%s)) +# +# # If a user script was specified, copy it into the docker container and execute it. +# if [ ! -z "$USER_SCRIPT_URI" ] ; then +# apply_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME +# fi +# +# # done user script +# STEP_TIMINGS+=($(date +%s)) +# +# # If a start user script was specified, copy it into the docker container for consumption during startups. +# if [ ! -z "$START_USER_SCRIPT_URI" ] ; then +# apply_start_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME +# fi +# +# # done start user script +# STEP_TIMINGS+=($(date +%s)) +# +# # Install lab extensions if provided by the user +# # Note: lab extensions need to installed as jupyter user, not root +# if [ ! -z "${JUPYTER_LAB_EXTENSIONS}" ] ; then +# for ext in ${JUPYTER_LAB_EXTENSIONS} +# do +# log 'Installing JupyterLab extension [$ext]...' +# pwd +# if [[ $ext == 'gs://'* ]]; then +# gsutil cp -r $ext /etc +# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` +# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} +# retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} +# elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then +# JUPYTER_EXTENSION_FILE=`basename $ext` +# curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} +# docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} +# retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} +# else +# retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh $ext +# fi +# done +# fi +# +# STEP_TIMINGS+=($(date +%s)) +# +# # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume +# # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter +# # kernel tries to connect to it. +# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true +# +# # jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network +# # A better to do this might be to take welder host as an argument to the script +# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/custom/jupyter_delocalize.py" +# +# # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker. +# # This is to make it so that older images will still work after we change notebooks location to home dir +# docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py +# # # Copy gitignore into jupyter container # docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global" # @@ -542,49 +492,49 @@ EOF # && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ # && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ # && mkdir -p $JUPYTER_HOME/nbconfig" +# +# log 'Starting Jupyter Notebook...' +# retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}" - log 'Starting Jupyter Notebook...' - retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}" - - STEP_TIMINGS+=($(date +%s)) - fi - - # RStudio specific setup; only do if RStudio is installed - if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then - EXIT_CODE=0 - retry 3 docker exec ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/set_up_package_dir.sh || EXIT_CODE=$? - if [ $EXIT_CODE -ne 0 ]; then - echo "RStudio user package installation directory creation failed, creating /packages directory" - docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c "mkdir -p ${RSTUDIO_USER_HOME}/packages && chmod a+rwx ${RSTUDIO_USER_HOME}/packages" - fi - - # Add the EVs specified in rstudio-docker-compose.yaml to Renviron.site - retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'echo "GOOGLE_PROJECT=$GOOGLE_PROJECT -CLUSTER_NAME=$CLUSTER_NAME -RUNTIME_NAME=$RUNTIME_NAME -OWNER_EMAIL=$OWNER_EMAIL -SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC" >> /usr/local/lib/R/etc/Renviron.site' - - # Add custom_env_vars.env to Renviron.site - CUSTOM_ENV_VARS_FILE=/var/custom_env_vars.env - if [ -f "$CUSTOM_ENV_VARS_FILE" ]; then - retry 3 docker cp ${CUSTOM_ENV_VARS_FILE} ${RSTUDIO_SERVER_NAME}:/usr/local/lib/R/var/custom_env_vars.env - retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'cat /usr/local/lib/R/var/custom_env_vars.env >> /usr/local/lib/R/etc/Renviron.site' - fi - - # If a user script was specified, copy it into the docker container and execute it. - if [ ! -z "$USER_SCRIPT_URI" ] ; then - apply_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS - fi - - # If a start user script was specified, copy it into the docker container for consumption during startups. - if [ ! -z "$START_USER_SCRIPT_URI" ] ; then - apply_start_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS - fi - - # Start RStudio server - retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init - fi +# STEP_TIMINGS+=($(date +%s)) +# fi +# +# # RStudio specific setup; only do if RStudio is installed +# if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then +# EXIT_CODE=0 +# retry 3 docker exec ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/set_up_package_dir.sh || EXIT_CODE=$? +# if [ $EXIT_CODE -ne 0 ]; then +# echo "RStudio user package installation directory creation failed, creating /packages directory" +# docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c "mkdir -p ${RSTUDIO_USER_HOME}/packages && chmod a+rwx ${RSTUDIO_USER_HOME}/packages" +# fi +# +# # Add the EVs specified in rstudio-docker-compose.yaml to Renviron.site +# retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'echo "GOOGLE_PROJECT=$GOOGLE_PROJECT +#CLUSTER_NAME=$CLUSTER_NAME +#RUNTIME_NAME=$RUNTIME_NAME +#OWNER_EMAIL=$OWNER_EMAIL +#SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC" >> /usr/local/lib/R/etc/Renviron.site' +# +# # Add custom_env_vars.env to Renviron.site +# CUSTOM_ENV_VARS_FILE=/var/custom_env_vars.env +# if [ -f "$CUSTOM_ENV_VARS_FILE" ]; then +# retry 3 docker cp ${CUSTOM_ENV_VARS_FILE} ${RSTUDIO_SERVER_NAME}:/usr/local/lib/R/var/custom_env_vars.env +# retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'cat /usr/local/lib/R/var/custom_env_vars.env >> /usr/local/lib/R/etc/Renviron.site' +# fi +# +# # If a user script was specified, copy it into the docker container and execute it. +# if [ ! -z "$USER_SCRIPT_URI" ] ; then +# apply_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS +# fi +# +# # If a start user script was specified, copy it into the docker container for consumption during startups. +# if [ ! -z "$START_USER_SCRIPT_URI" ] ; then +# apply_start_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS +# fi +# +# # Start RStudio server +# retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init +# fi # Remove any unneeded cached images to save disk space. # Do this asynchronously so it doesn't hold up cluster creation From d759fee245ea0b7ce4b210494d57e664644e11f3 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Thu, 1 May 2025 10:07:47 -0400 Subject: [PATCH 18/35] cache new hail and aou images compatible with dataproc 2.2.x --- automation/src/test/resources/reference.conf | 4 ++-- ...pare-custom-leonardo-jupyter-dataproc-image.sh | 15 ++++++++------- jenkins/gce-custom-images/prepare_gce_image.sh | 5 +++-- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/automation/src/test/resources/reference.conf b/automation/src/test/resources/reference.conf index ce5cdd59654..110051c3b8a 100644 --- a/automation/src/test/resources/reference.conf +++ b/automation/src/test/resources/reference.conf @@ -1,9 +1,9 @@ leonardo { rImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.7" pythonImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.6" - hailImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-hail:1.1.13" + hailImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-hail:1.1.14" gatkImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.9" - aouImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.15" + aouImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.16" baseImageUrl = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-base:1.1.4" gcrWelderUri = "us.gcr.io/broad-dsp-gcr-public/welder-server" dockerHubWelderUri = "broadinstitute/welder-server" diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh index 3e6db4973c7..20e4ff32158 100755 --- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh +++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh @@ -16,19 +16,20 @@ set -e -x # the image tags are set via jenkins automation # -terra_jupyter_python="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.5" -terra_jupyter_r="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.6" -terra_jupyter_bioconductor="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-bioconductor:2.2.6" -terra_jupyter_hail="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-hail:1.1.11" -terra_jupyter_gatk="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.8" -terra_jupyter_aou="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.14" +terra_jupyter_python="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.6" +terra_jupyter_r="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.7" +terra_jupyter_bioconductor="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-bioconductor:2.2.7" +terra_jupyter_hail="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-hail:1.1.14" +terra_jupyter_gatk="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.9" +terra_jupyter_aou="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.16" welder_server="us.gcr.io/broad-dsp-gcr-public/welder-server:8667bfe" openidc_proxy="broadinstitute/openidc-proxy:2.3.1_2" anvil_rstudio_bioconductor="us.gcr.io/broad-dsp-gcr-public/anvil-rstudio-bioconductor:3.20.1" # Note that this is the version used currently by AOU in production, the one above can be staged for testing -terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.13" +# AN-503: Note that AOU 2.2.15 is using a hail version that still requires dataproc 2.1.x, which won't be provided by leonardo anymore +terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.15" # If you change this you must also change Leo reference.conf! cryptomining_detector="us.gcr.io/broad-dsp-gcr-public/cryptomining-detector:0.0.2" diff --git a/jenkins/gce-custom-images/prepare_gce_image.sh b/jenkins/gce-custom-images/prepare_gce_image.sh index 185616c8fe3..9db400dc63d 100755 --- a/jenkins/gce-custom-images/prepare_gce_image.sh +++ b/jenkins/gce-custom-images/prepare_gce_image.sh @@ -20,13 +20,14 @@ terra_jupyter_python="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:1.1.6" terra_jupyter_r="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:2.2.7" terra_jupyter_bioconductor="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-bioconductor:2.2.7" terra_jupyter_gatk="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.9" -terra_jupyter_aou="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.15" +terra_jupyter_aou="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.16" welder_server="us.gcr.io/broad-dsp-gcr-public/welder-server:8667bfe" openidc_proxy="broadinstitute/openidc-proxy:2.3.1_2" anvil_rstudio_bioconductor="us.gcr.io/broad-dsp-gcr-public/anvil-rstudio-bioconductor:3.20.1" # Note that this is the version used currently by AOU in production, the one above can be staged for testing -terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.14" +# AN-503: Note that AOU 2.2.15 is using a hail version that still requires dataproc 2.1.x, which won't be provided by leonardo anymore +terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.15" cos_gpu_installer="gcr.io/cos-cloud/cos-gpu-installer:v2.1.9" google_cloud_toolbox="us.gcr.io/cos-cloud/toolbox:v20230714" From 1efedaa1ead985195869a50343e35248bf5316d1 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Thu, 1 May 2025 16:58:10 -0400 Subject: [PATCH 19/35] specify InternalIpOnly flag to False for dataproc --- .../resources/init-resources/init-actions.sh | 622 +++++++++--------- .../leonardo/util/DataprocInterpreter.scala | 4 +- 2 files changed, 314 insertions(+), 312 deletions(-) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index 42889a593a0..96ec09fe2ae 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -138,7 +138,7 @@ function apply_start_user_script() { STEP_TIMINGS=($(date +%s)) -### Installs Google Cloud Ops Agent that is now required for Datapoc 2.2.X ### +## Installs Google Cloud Ops Agent that is now required for Datapoc 2.2.X ### # See https://github.com/GoogleCloudDataproc/initialization-actions/tree/master/opsagent # Installs the Google Cloud Ops Agent on each node in the cluster. # It also provides an override to the built-in logging config to set empty @@ -146,19 +146,19 @@ STEP_TIMINGS=($(date +%s)) # If you need to collect syslogs, you can use the other script in this directory, # opsagent.sh which uses the built-in configuration of Ops Agent. # See https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/configuration#default. - -# Detect dataproc image version from its various names -#if (! test -v DATAPROC_IMAGE_VERSION) && test -v DATAPROC_VERSION; then -# DATAPROC_IMAGE_VERSION="${DATAPROC_VERSION}" -#fi # -#if [[ $(echo "${DATAPROC_IMAGE_VERSION} < 2.2" | bc -l) == 1 ]]; then -# echo "This Dataproc cluster node runs image version ${DATAPROC_IMAGE_VERSION} with pre-installed legacy monitoring agent. Skipping Ops Agent installation." -# exit 0 -#fi -# -#curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh -#bash add-google-cloud-ops-agent-repo.sh --also-install +# Detect dataproc image version from its various names +if (! test -v DATAPROC_IMAGE_VERSION) && test -v DATAPROC_VERSION; then + DATAPROC_IMAGE_VERSION="${DATAPROC_VERSION}" +fi + +if [[ $(echo "${DATAPROC_IMAGE_VERSION} < 2.2" | bc -l) == 1 ]]; then + echo "This Dataproc cluster node runs image version ${DATAPROC_IMAGE_VERSION} with pre-installed legacy monitoring agent. Skipping Ops Agent installation." + exit 0 +fi + +curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh +bash add-google-cloud-ops-agent-repo.sh --also-install # temp workaround for https://github.com/docker/compose/issues/5930 export CLOUDSDK_PYTHON=python3 @@ -236,305 +236,305 @@ if [[ "${ROLE}" == 'Master' ]]; then INIT_BUCKET_NAME=$(initBucketName) STEP_TIMINGS+=($(date +%s)) -# -# log 'Copying secrets from GCS...' -# -# mkdir /work -# mkdir /certs -# chmod a+rwx /work -# -# # Add the certificates from the bucket to the VM. They are used by the docker-compose file -# gsutil cp ${SERVER_CRT} /certs -# gsutil cp ${SERVER_KEY} /certs -# gsutil cp ${ROOT_CA} /certs -# gsutil cp gs://${INIT_BUCKET_NAME}/* ${DOCKER_COMPOSE_FILES_DIRECTORY} -# -# -# # GCP connector is used by dataproc to connect with the staging bucket to read the logs -# touch /hadoop_gcs_connector_metadata_cache -# touch auth_openidc.conf -# -# -# # Add ops agent configuration for welder, jupyter, user startup and shutdown scripts -# cat <> /etc/google-cloud-ops-agent/config.yaml -# logging: -# receivers: -# welder: -# type: files -# include_paths: [/work/welder.log] -# jupyter: -# type: files -# include_paths: [/work/jupyter.log] -# daemon: -# type: files -# include_paths: [/var/log/daemon.log] -# service: -# pipelines: -# default_pipeline: -# receivers: [welder, jupyter, daemon] -#EOF -# systemctl restart google-cloud-ops-agent -# -# # Install env var config -# if [ ! -z ${CUSTOM_ENV_VARS_CONFIG_URI} ] ; then -# log 'Copy custom env vars config...' -# gsutil cp ${CUSTOM_ENV_VARS_CONFIG_URI} /var -# fi -# -# -# # If any image is hosted in a GAR registry (detected by regex) then -# # authorize docker to interact with gcr.io. -# # NOTE: GCR images are now hosted on GAR, but the file paths haven't changed, they automatically redirect. -# if grep -qF "gcr.io" <<< "${JUPYTER_DOCKER_IMAGE}${RSTUDIO_DOCKER_IMAGE}${PROXY_DOCKER_IMAGE}${WELDER_DOCKER_IMAGE}" ; then -# log 'Authorizing GCR/GAR...' -# gcloud auth configure-docker -# fi -# -# STEP_TIMINGS+=($(date +%s)) -# -# log 'Starting up the Jupyter docker...' -# -# # Run docker-compose for each specified compose file. -# # Note the `docker-compose pull` is retried to avoid intermittent network errors, but -# # `docker-compose up` is not retried. -# COMPOSE_FILES=(-f /etc/`basename ${PROXY_DOCKER_COMPOSE}`) -# -# cat /etc/`basename ${PROXY_DOCKER_COMPOSE}` -# -# if [ ! -z ${WELDER_DOCKER_IMAGE} ] && [ "${WELDER_ENABLED}" == "true" ] ; then -# COMPOSE_FILES+=(-f /etc/`basename ${WELDER_DOCKER_COMPOSE}`) -# cat /etc/`basename ${WELDER_DOCKER_COMPOSE}` -# fi -# -# if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then -# TOOL_SERVER_NAME=${JUPYTER_SERVER_NAME} -# COMPOSE_FILES+=(-f /etc/`basename ${JUPYTER_DOCKER_COMPOSE}`) -# cat /etc/`basename ${JUPYTER_DOCKER_COMPOSE}` -# fi -# -# if [ ! -z ${RSTUDIO_DOCKER_IMAGE} ] ; then -# TOOL_SERVER_NAME=${RSTUDIO_SERVER_NAME} -# COMPOSE_FILES+=(-f /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}`) -# cat /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}` -# fi -# -# retry 5 docker-compose "${COMPOSE_FILES[@]}" config -# -# # restart docker -# systemctl restart docker -# -# retry 5 docker-compose "${COMPOSE_FILES[@]}" pull -# retry 5 docker-compose "${COMPOSE_FILES[@]}" up -d -# -# # Start up crypto detector, if enabled. -# # This should be started after other containers. -# # Use `docker run` instead of docker-compose so we can link it to the Jupyter/RStudio container's network. -# # See https://github.com/broadinstitute/terra-cryptomining-security-alerts/tree/master/v2 -# if [ ! -z "$CRYPTO_DETECTOR_DOCKER_IMAGE" ] ; then -# docker run --name=${CRYPTO_DETECTOR_SERVER_NAME} --rm -d \ -# --net=container:${TOOL_SERVER_NAME} ${CRYPTO_DETECTOR_DOCKER_IMAGE} -# fi -# -# STEP_TIMINGS+=($(date +%s)) -# -# # Jupyter-specific setup, only do if Jupyter is installed -# if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then -# log 'Installing Jupydocker kernelspecs...' -# -# # Install notebook.json -# if [ ! -z ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} ] ; then -# log 'Copy Jupyter frontend notebook config...' -# gsutil cp ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} /etc -# JUPYTER_NOTEBOOK_FRONTEND_CONFIG=`basename ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI}` -# retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "mkdir -p $JUPYTER_HOME/nbconfig" -# docker cp /etc/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/ -# fi -# -# STEP_TIMINGS+=($(date +%s)) -# -# # Install NbExtensions. These are user-specified Jupyter extensions. -# # For instance Terra UI is passing -# # { -# # "nbExtensions": { -# # "saturn-iframe-extension": "https://bvdp-saturn-dev.appspot.com/jupyter-iframe-extension.js" -# # }, -# # "labExtensions": {}, -# # "serverExtensions": {}, -# # "combinedExtensions": {} -# # } -# if [ ! -z "${JUPYTER_NB_EXTENSIONS}" ] ; then -# for ext in ${JUPYTER_NB_EXTENSIONS} -# do -# log 'Installing Jupyter NB extension [$ext]...' -# if [[ $ext == 'gs://'* ]]; then -# gsutil cp $ext /etc -# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` -# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} -# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} -# elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then -# JUPYTER_EXTENSION_FILE=`basename $ext` -# curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} -# docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} -# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} -# else -# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_notebook_extension.sh $ext -# fi -# done -# fi -# -# STEP_TIMINGS+=($(date +%s)) -# -# # Install serverExtensions if provided by the user -# if [ ! -z "${JUPYTER_SERVER_EXTENSIONS}" ] ; then -# for ext in ${JUPYTER_SERVER_EXTENSIONS} -# do -# log 'Installing Jupyter server extension [$ext]...' -# if [[ $ext == 'gs://'* ]]; then -# gsutil cp $ext /etc -# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` -# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} -# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} -# else -# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_server_extension.sh $ext -# fi -# done -# fi -# -# STEP_TIMINGS+=($(date +%s)) -# -# # Install combined extensions if provided by the user -# if [ ! -z "${JUPYTER_COMBINED_EXTENSIONS}" ] ; then -# for ext in ${JUPYTER_COMBINED_EXTENSIONS} -# do -# log 'Installing Jupyter combined extension [$ext]...' -# log $ext -# if [[ $ext == 'gs://'* ]]; then -# gsutil cp $ext /etc -# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` -# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} -# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE} -# else -# retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_combined_extension.sh $ext -# fi -# done -# fi -# -# STEP_TIMINGS+=($(date +%s)) -# -# # If a user script was specified, copy it into the docker container and execute it. -# if [ ! -z "$USER_SCRIPT_URI" ] ; then -# apply_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME -# fi -# -# # done user script -# STEP_TIMINGS+=($(date +%s)) -# -# # If a start user script was specified, copy it into the docker container for consumption during startups. -# if [ ! -z "$START_USER_SCRIPT_URI" ] ; then -# apply_start_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME -# fi -# -# # done start user script -# STEP_TIMINGS+=($(date +%s)) -# -# # Install lab extensions if provided by the user -# # Note: lab extensions need to installed as jupyter user, not root -# if [ ! -z "${JUPYTER_LAB_EXTENSIONS}" ] ; then -# for ext in ${JUPYTER_LAB_EXTENSIONS} -# do -# log 'Installing JupyterLab extension [$ext]...' -# pwd -# if [[ $ext == 'gs://'* ]]; then -# gsutil cp -r $ext /etc -# JUPYTER_EXTENSION_ARCHIVE=`basename $ext` -# docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} -# retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} -# elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then -# JUPYTER_EXTENSION_FILE=`basename $ext` -# curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} -# docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} -# retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} -# else -# retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh $ext -# fi -# done -# fi -# -# STEP_TIMINGS+=($(date +%s)) -# -# # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume -# # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter -# # kernel tries to connect to it. -# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true -# -# # jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network -# # A better to do this might be to take welder host as an argument to the script -# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/custom/jupyter_delocalize.py" -# -# # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker. -# # This is to make it so that older images will still work after we change notebooks location to home dir -# docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py -# -# # Copy gitignore into jupyter container -# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global" -# -# # Install nbstripout and set gitignore in Git Config -# docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \ -# && python -m nbstripout --install --global \ -# && git config --global core.excludesfile $JUPYTER_USER_HOME/gitignore_global" -# -# # Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU) -# docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \ -# && mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \ -# && cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \ -# && ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \ -# && cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \ -# && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ -# && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ -# && mkdir -p $JUPYTER_HOME/nbconfig" -# -# log 'Starting Jupyter Notebook...' -# retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}" -# STEP_TIMINGS+=($(date +%s)) -# fi -# -# # RStudio specific setup; only do if RStudio is installed -# if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then -# EXIT_CODE=0 -# retry 3 docker exec ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/set_up_package_dir.sh || EXIT_CODE=$? -# if [ $EXIT_CODE -ne 0 ]; then -# echo "RStudio user package installation directory creation failed, creating /packages directory" -# docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c "mkdir -p ${RSTUDIO_USER_HOME}/packages && chmod a+rwx ${RSTUDIO_USER_HOME}/packages" -# fi -# -# # Add the EVs specified in rstudio-docker-compose.yaml to Renviron.site -# retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'echo "GOOGLE_PROJECT=$GOOGLE_PROJECT -#CLUSTER_NAME=$CLUSTER_NAME -#RUNTIME_NAME=$RUNTIME_NAME -#OWNER_EMAIL=$OWNER_EMAIL -#SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC" >> /usr/local/lib/R/etc/Renviron.site' -# -# # Add custom_env_vars.env to Renviron.site -# CUSTOM_ENV_VARS_FILE=/var/custom_env_vars.env -# if [ -f "$CUSTOM_ENV_VARS_FILE" ]; then -# retry 3 docker cp ${CUSTOM_ENV_VARS_FILE} ${RSTUDIO_SERVER_NAME}:/usr/local/lib/R/var/custom_env_vars.env -# retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'cat /usr/local/lib/R/var/custom_env_vars.env >> /usr/local/lib/R/etc/Renviron.site' -# fi -# -# # If a user script was specified, copy it into the docker container and execute it. -# if [ ! -z "$USER_SCRIPT_URI" ] ; then -# apply_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS -# fi -# -# # If a start user script was specified, copy it into the docker container for consumption during startups. -# if [ ! -z "$START_USER_SCRIPT_URI" ] ; then -# apply_start_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS -# fi -# -# # Start RStudio server -# retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init -# fi + log 'Copying secrets from GCS...' + + mkdir /work + mkdir /certs + chmod a+rwx /work + + # Add the certificates from the bucket to the VM. They are used by the docker-compose file + gsutil cp ${SERVER_CRT} /certs + gsutil cp ${SERVER_KEY} /certs + gsutil cp ${ROOT_CA} /certs + gsutil cp gs://${INIT_BUCKET_NAME}/* ${DOCKER_COMPOSE_FILES_DIRECTORY} + + + # GCP connector is used by dataproc to connect with the staging bucket to read the logs + touch /hadoop_gcs_connector_metadata_cache + touch auth_openidc.conf + + + # Add ops agent configuration for welder, jupyter, user startup and shutdown scripts + cat <> /etc/google-cloud-ops-agent/config.yaml + logging: + receivers: + welder: + type: files + include_paths: [/work/welder.log] + jupyter: + type: files + include_paths: [/work/jupyter.log] + daemon: + type: files + include_paths: [/var/log/daemon.log] + service: + pipelines: + default_pipeline: + receivers: [welder, jupyter, daemon] +EOF + systemctl restart google-cloud-ops-agent + + # Install env var config + if [ ! -z ${CUSTOM_ENV_VARS_CONFIG_URI} ] ; then + log 'Copy custom env vars config...' + gsutil cp ${CUSTOM_ENV_VARS_CONFIG_URI} /var + fi + + + # If any image is hosted in a GAR registry (detected by regex) then + # authorize docker to interact with gcr.io. + # NOTE: GCR images are now hosted on GAR, but the file paths haven't changed, they automatically redirect. + if grep -qF "gcr.io" <<< "${JUPYTER_DOCKER_IMAGE}${RSTUDIO_DOCKER_IMAGE}${PROXY_DOCKER_IMAGE}${WELDER_DOCKER_IMAGE}" ; then + log 'Authorizing GCR/GAR...' + gcloud auth configure-docker + fi + + STEP_TIMINGS+=($(date +%s)) + + log 'Starting up the Jupyter docker...' + + # Run docker-compose for each specified compose file. + # Note the `docker-compose pull` is retried to avoid intermittent network errors, but + # `docker-compose up` is not retried. + COMPOSE_FILES=(-f /etc/`basename ${PROXY_DOCKER_COMPOSE}`) + + cat /etc/`basename ${PROXY_DOCKER_COMPOSE}` + + if [ ! -z ${WELDER_DOCKER_IMAGE} ] && [ "${WELDER_ENABLED}" == "true" ] ; then + COMPOSE_FILES+=(-f /etc/`basename ${WELDER_DOCKER_COMPOSE}`) + cat /etc/`basename ${WELDER_DOCKER_COMPOSE}` + fi + + if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then + TOOL_SERVER_NAME=${JUPYTER_SERVER_NAME} + COMPOSE_FILES+=(-f /etc/`basename ${JUPYTER_DOCKER_COMPOSE}`) + cat /etc/`basename ${JUPYTER_DOCKER_COMPOSE}` + fi + + if [ ! -z ${RSTUDIO_DOCKER_IMAGE} ] ; then + TOOL_SERVER_NAME=${RSTUDIO_SERVER_NAME} + COMPOSE_FILES+=(-f /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}`) + cat /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}` + fi + + retry 5 docker-compose "${COMPOSE_FILES[@]}" config + + # restart docker + systemctl restart docker + + retry 5 docker-compose "${COMPOSE_FILES[@]}" pull + retry 5 docker-compose "${COMPOSE_FILES[@]}" up -d + + # Start up crypto detector, if enabled. + # This should be started after other containers. + # Use `docker run` instead of docker-compose so we can link it to the Jupyter/RStudio container's network. + # See https://github.com/broadinstitute/terra-cryptomining-security-alerts/tree/master/v2 + if [ ! -z "$CRYPTO_DETECTOR_DOCKER_IMAGE" ] ; then + docker run --name=${CRYPTO_DETECTOR_SERVER_NAME} --rm -d \ + --net=container:${TOOL_SERVER_NAME} ${CRYPTO_DETECTOR_DOCKER_IMAGE} + fi + + STEP_TIMINGS+=($(date +%s)) + + # Jupyter-specific setup, only do if Jupyter is installed + if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then + log 'Installing Jupydocker kernelspecs...' + + # Install notebook.json + if [ ! -z ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} ] ; then + log 'Copy Jupyter frontend notebook config...' + gsutil cp ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} /etc + JUPYTER_NOTEBOOK_FRONTEND_CONFIG=`basename ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI}` + retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "mkdir -p $JUPYTER_HOME/nbconfig" + docker cp /etc/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/ + fi + + STEP_TIMINGS+=($(date +%s)) + + # Install NbExtensions. These are user-specified Jupyter extensions. + # For instance Terra UI is passing + # { + # "nbExtensions": { + # "saturn-iframe-extension": "https://bvdp-saturn-dev.appspot.com/jupyter-iframe-extension.js" + # }, + # "labExtensions": {}, + # "serverExtensions": {}, + # "combinedExtensions": {} + # } + if [ ! -z "${JUPYTER_NB_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_NB_EXTENSIONS} + do + log 'Installing Jupyter NB extension [$ext]...' + if [[ $ext == 'gs://'* ]]; then + gsutil cp $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then + JUPYTER_EXTENSION_FILE=`basename $ext` + curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} + docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + else + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_notebook_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # Install serverExtensions if provided by the user + if [ ! -z "${JUPYTER_SERVER_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_SERVER_EXTENSIONS} + do + log 'Installing Jupyter server extension [$ext]...' + if [[ $ext == 'gs://'* ]]; then + gsutil cp $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + else + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_server_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # Install combined extensions if provided by the user + if [ ! -z "${JUPYTER_COMBINED_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_COMBINED_EXTENSIONS} + do + log 'Installing Jupyter combined extension [$ext]...' + log $ext + if [[ $ext == 'gs://'* ]]; then + gsutil cp $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE} + else + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_combined_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # If a user script was specified, copy it into the docker container and execute it. + if [ ! -z "$USER_SCRIPT_URI" ] ; then + apply_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME + fi + + # done user script + STEP_TIMINGS+=($(date +%s)) + + # If a start user script was specified, copy it into the docker container for consumption during startups. + if [ ! -z "$START_USER_SCRIPT_URI" ] ; then + apply_start_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME + fi + + # done start user script + STEP_TIMINGS+=($(date +%s)) + + # Install lab extensions if provided by the user + # Note: lab extensions need to installed as jupyter user, not root + if [ ! -z "${JUPYTER_LAB_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_LAB_EXTENSIONS} + do + log 'Installing JupyterLab extension [$ext]...' + pwd + if [[ $ext == 'gs://'* ]]; then + gsutil cp -r $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then + JUPYTER_EXTENSION_FILE=`basename $ext` + curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} + docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + else + retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume + # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter + # kernel tries to connect to it. + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true + + # jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network + # A better to do this might be to take welder host as an argument to the script + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/custom/jupyter_delocalize.py" + + # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker. + # This is to make it so that older images will still work after we change notebooks location to home dir + docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py + + # Copy gitignore into jupyter container + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global" + + # Install nbstripout and set gitignore in Git Config + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \ + && python -m nbstripout --install --global \ + && git config --global core.excludesfile $JUPYTER_USER_HOME/gitignore_global" + + # Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU) + docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \ + && mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \ + && cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \ + && ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \ + && cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \ + && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ + && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ + && mkdir -p $JUPYTER_HOME/nbconfig" + + log 'Starting Jupyter Notebook...' + retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}" + + STEP_TIMINGS+=($(date +%s)) + fi + + # RStudio specific setup; only do if RStudio is installed + if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then + EXIT_CODE=0 + retry 3 docker exec ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/set_up_package_dir.sh || EXIT_CODE=$? + if [ $EXIT_CODE -ne 0 ]; then + echo "RStudio user package installation directory creation failed, creating /packages directory" + docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c "mkdir -p ${RSTUDIO_USER_HOME}/packages && chmod a+rwx ${RSTUDIO_USER_HOME}/packages" + fi + + # Add the EVs specified in rstudio-docker-compose.yaml to Renviron.site + retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'echo "GOOGLE_PROJECT=$GOOGLE_PROJECT +CLUSTER_NAME=$CLUSTER_NAME +RUNTIME_NAME=$RUNTIME_NAME +OWNER_EMAIL=$OWNER_EMAIL +SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC" >> /usr/local/lib/R/etc/Renviron.site' + + # Add custom_env_vars.env to Renviron.site + CUSTOM_ENV_VARS_FILE=/var/custom_env_vars.env + if [ -f "$CUSTOM_ENV_VARS_FILE" ]; then + retry 3 docker cp ${CUSTOM_ENV_VARS_FILE} ${RSTUDIO_SERVER_NAME}:/usr/local/lib/R/var/custom_env_vars.env + retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'cat /usr/local/lib/R/var/custom_env_vars.env >> /usr/local/lib/R/etc/Renviron.site' + fi + + # If a user script was specified, copy it into the docker container and execute it. + if [ ! -z "$USER_SCRIPT_URI" ] ; then + apply_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS + fi + + # If a start user script was specified, copy it into the docker container for consumption during startups. + if [ ! -z "$START_USER_SCRIPT_URI" ] ; then + apply_start_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS + fi + + # Start RStudio server + retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init + fi # Remove any unneeded cached images to save disk space. # Do this asynchronously so it doesn't hold up cluster creation diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala index 5ed6715621e..ff509d53d1d 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala @@ -204,7 +204,8 @@ class DataprocInterpreter[F[_]: Parallel]( } else { List(config.vpcConfig.networkTag.value) } - + // Dataproc 2.2.X changed the default behavior to not assign an external ip address by default + // https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/network#create-a-dataproc-cluster-with-internal-IP-addresses-only gceClusterConfig = { val bldr = GceClusterConfig .newBuilder() @@ -212,6 +213,7 @@ class DataprocInterpreter[F[_]: Parallel]( .setSubnetworkUri(subnetwork.value) .setServiceAccount(params.serviceAccountInfo.value) .addAllServiceAccountScopes(params.scopes.asJava) + .setInternalIpOnly(false) bldr.build() } From 64b6d9b50c8acbeb597d8e4447ad748937476d9d Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Fri, 2 May 2025 10:23:20 -0400 Subject: [PATCH 20/35] update with cached hail 234 image --- http/src/main/resources/reference.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/http/src/main/resources/reference.conf b/http/src/main/resources/reference.conf index b672f9c05e2..e9619b030b8 100644 --- a/http/src/main/resources/reference.conf +++ b/http/src/main/resources/reference.conf @@ -70,7 +70,7 @@ dataproc { } # Cached dataproc image used by Terra - customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-2-52-debian12-2025-04-24-14-59-45" + customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-2-52-debian12-2025-05-01-21-02-02" # The ratio of memory allocated to spark. 0.8 = 80%. # Hail/Spark users generally allocate 80% of the ram to the JVM. From a9983222c765d1270e0dd6e7d9d2c926c305f76a Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Fri, 2 May 2025 14:39:17 -0400 Subject: [PATCH 21/35] point to new gce image that has the latest aou as well --- http/src/main/resources/reference.conf | 2 +- .../dsde/workbench/leonardo/util/DataprocInterpreter.scala | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/http/src/main/resources/reference.conf b/http/src/main/resources/reference.conf index e9619b030b8..639778e3098 100644 --- a/http/src/main/resources/reference.conf +++ b/http/src/main/resources/reference.conf @@ -111,7 +111,7 @@ dataproc { } gce { - customGceImage = "projects/broad-dsp-gcr-public/global/images/leo-gce-image-2025-04-28-18-54-55" + customGceImage = "projects/broad-dsp-gcr-public/global/images/leo-gce-image-2025-05-02-18-01-22" userDiskDeviceName = "user-disk" defaultScopes = [ "https://www.googleapis.com/auth/userinfo.email", diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala index ff509d53d1d..55b6ec3e344 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala @@ -204,7 +204,8 @@ class DataprocInterpreter[F[_]: Parallel]( } else { List(config.vpcConfig.networkTag.value) } - // Dataproc 2.2.X changed the default behavior to not assign an external ip address by default + // Dataproc 2.2.X changed the default behavior to not assign an external ip address anymore, + // but a combination of Internal IP only and Private Google Access instead, see: // https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/network#create-a-dataproc-cluster-with-internal-IP-addresses-only gceClusterConfig = { val bldr = GceClusterConfig From 591321cd516b80997ea36ebc540e00d8a71059d1 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Fri, 2 May 2025 14:43:36 -0400 Subject: [PATCH 22/35] cleanup --- .../prepare-custom-leonardo-jupyter-dataproc-image.sh | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh index 20e4ff32158..96fcf236fb3 100755 --- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh +++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh @@ -144,15 +144,11 @@ retry 5 add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/d retry 5 apt-get update dpkg --configure -a -# This line fails consistently, but it does not fail in a fatal way so we add `|| true` to prevent the script from halting execution -# The message that is non-fatal is `Sub-process /usr/bin/dpkg returned an error code (1).` -# NOTE: If it fails with another legitimate error, this `|| true` could mask it. It was used as a last resort after a lot of attempts to fix. -# apt-get install -y -q docker-ce || true -log 'Installing Docker Compose...' # start docker systemctl start docker +log 'Installing Docker Compose...' # Install docker-compose # https://docs.docker.com/compose/install/#install-compose docker_compose_version_number="v2.28.1" From 41f7e89aaafa349baaf8c6514becc37dfea7ace0 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Mon, 5 May 2025 09:57:09 -0400 Subject: [PATCH 23/35] update default gatk image version used --- http/src/main/resources/reference.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/http/src/main/resources/reference.conf b/http/src/main/resources/reference.conf index 639778e3098..77bd716e0aa 100644 --- a/http/src/main/resources/reference.conf +++ b/http/src/main/resources/reference.conf @@ -896,7 +896,7 @@ image { welderGcrUri = "us.gcr.io/broad-dsp-gcr-public/welder-server" welderDockerHubUri = "broadinstitute/welder-server" welderHash = "0c1d0eb" - jupyterImage = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.6" + jupyterImage = "us.gcr.io/broad-dsp-gcr-public/terra-jupyter-gatk:2.3.9" proxyImage = "broadinstitute/openidc-proxy:2.3.1_2" # Note: If you update this, please also update prepare_gce_image.sh and # prepare-custom-leonardo-jupyter-dataproc-image.sh scripts. From 4b01b25cf42c586c8b729d9c9e37af4c8f6b9826 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Mon, 5 May 2025 13:06:42 -0400 Subject: [PATCH 24/35] cache both aou 2.2.13 and 2.2.16 to help them transition smoothly --- .../prepare-custom-leonardo-jupyter-dataproc-image.sh | 4 ++-- jenkins/gce-custom-images/prepare_gce_image.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh index 96fcf236fb3..cba8c5757c0 100755 --- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh +++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh @@ -28,8 +28,8 @@ openidc_proxy="broadinstitute/openidc-proxy:2.3.1_2" anvil_rstudio_bioconductor="us.gcr.io/broad-dsp-gcr-public/anvil-rstudio-bioconductor:3.20.1" # Note that this is the version used currently by AOU in production, the one above can be staged for testing -# AN-503: Note that AOU 2.2.15 is using a hail version that still requires dataproc 2.1.x, which won't be provided by leonardo anymore -terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.15" +# AN-503: Note that AOU 2.2.13 is using a hail version that still requires dataproc 2.1.x +terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.13" # If you change this you must also change Leo reference.conf! cryptomining_detector="us.gcr.io/broad-dsp-gcr-public/cryptomining-detector:0.0.2" diff --git a/jenkins/gce-custom-images/prepare_gce_image.sh b/jenkins/gce-custom-images/prepare_gce_image.sh index 9db400dc63d..a7dab5e6915 100755 --- a/jenkins/gce-custom-images/prepare_gce_image.sh +++ b/jenkins/gce-custom-images/prepare_gce_image.sh @@ -26,8 +26,8 @@ openidc_proxy="broadinstitute/openidc-proxy:2.3.1_2" anvil_rstudio_bioconductor="us.gcr.io/broad-dsp-gcr-public/anvil-rstudio-bioconductor:3.20.1" # Note that this is the version used currently by AOU in production, the one above can be staged for testing -# AN-503: Note that AOU 2.2.15 is using a hail version that still requires dataproc 2.1.x, which won't be provided by leonardo anymore -terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.15" +# AN-503: Note that AOU 2.2.13 is using a hail version that still requires dataproc 2.1.x +terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.13" cos_gpu_installer="gcr.io/cos-cloud/cos-gpu-installer:v2.1.9" google_cloud_toolbox="us.gcr.io/cos-cloud/toolbox:v20230714" From 67f9f487d541672725106b8e7b74baaf9bb5818d Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Mon, 5 May 2025 13:40:15 -0400 Subject: [PATCH 25/35] use dataproc 2.1 for aou and 2.2 for terra --- http/src/main/resources/reference.conf | 4 +++- .../dsde/workbench/leonardo/config/Config.scala | 1 + .../dsde/workbench/leonardo/config/DataprocConfig.scala | 1 + .../dsde/workbench/leonardo/util/DataprocInterpreter.scala | 5 ++++- .../dsde/workbench/leonardo/CommonTestData.scala | 2 ++ .../prepare-custom-leonardo-jupyter-dataproc-image.sh | 4 ---- 6 files changed, 11 insertions(+), 6 deletions(-) diff --git a/http/src/main/resources/reference.conf b/http/src/main/resources/reference.conf index 77bd716e0aa..2f5d4f5f599 100644 --- a/http/src/main/resources/reference.conf +++ b/http/src/main/resources/reference.conf @@ -69,8 +69,10 @@ dataproc { region = "us-central1" } - # Cached dataproc image used by Terra + # Cached dataproc 2.2.x image used by Terra customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-2-52-debian12-2025-05-01-21-02-02" + # Cached dataproc 2.1.x image used by AOU + legacyAouCustomDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-0-51-debian10-2023-12-05-21-47-12" # The ratio of memory allocated to spark. 0.8 = 80%. # Hail/Spark users generally allocate 80% of the ram to the JVM. diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala index f9f62aee65e..25db97e0248 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala @@ -124,6 +124,7 @@ object Config { DataprocConfig( config.getStringList("defaultScopes").asScala.toSet, config.as[DataprocCustomImage]("customDataprocImage"), + config.as[DataprocCustomImage]("legacyAouCustomDataprocImage"), config.getAs[Double]("sparkMemoryConfigRatio"), config.getAs[Double]("minimumRuntimeMemoryInGb"), config.as[RuntimeConfig.DataprocConfig]("runtimeDefaults"), diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala index 1b7d8c20d3d..a4bea7bc594 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala @@ -7,6 +7,7 @@ import org.broadinstitute.dsde.workbench.leonardo.CustomImage.DataprocCustomImag final case class DataprocConfig( defaultScopes: Set[String], customDataprocImage: DataprocCustomImage, + legacyAouCustomDataprocImage: DataprocCustomImage, sparkMemoryConfigRatio: Option[Double], minimumRuntimeMemoryInGb: Option[Double], runtimeConfigDefaults: RuntimeConfig.DataprocConfig, diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala index 55b6ec3e344..bfbf88e049d 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala @@ -193,7 +193,10 @@ class DataprocInterpreter[F[_]: Parallel]( // We need to maintain the old version of the dataproc image to uncouple the terra from the aou release imageUrls = params.runtimeImages.map(_.imageUrl) - dataprocImage = config.dataprocConfig.customDataprocImage + dataprocImage = + if (imageUrls.contains("us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.13")) + config.dataprocConfig.legacyAouCustomDataprocImage + else config.dataprocConfig.customDataprocImage // If the cluster is configured with worker private access, then specify the // `leonardo-private` network tag. This tag will be removed from the master node diff --git a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/CommonTestData.scala b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/CommonTestData.scala index 094f8300a7e..82b6e97c038 100644 --- a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/CommonTestData.scala +++ b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/CommonTestData.scala @@ -236,6 +236,8 @@ object CommonTestData { RuntimeImage(Proxy, imageConfig.proxyImage.imageUrl, None, Instant.now.truncatedTo(ChronoUnit.MICROS)) val customDataprocImage = RuntimeImage(BootSource, "custom_dataproc", None, Instant.now.truncatedTo(ChronoUnit.MICROS)) + val legacyAouCustomDataprocImage = + RuntimeImage(BootSource, "legacy_aou_custom_dataproc", None, Instant.now.truncatedTo(ChronoUnit.MICROS)) val cryptoDetectorImage = RuntimeImage(CryptoDetector, "crypto/crypto:0.0.1", None, Instant.now.truncatedTo(ChronoUnit.MICROS)) diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh index cba8c5757c0..226e2bba095 100755 --- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh +++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh @@ -38,10 +38,6 @@ cryptomining_detector="us.gcr.io/broad-dsp-gcr-public/cryptomining-detector:0.0. # the entry must match the var name above, which must correspond to a valid docker URI docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" -# NOTE - UNCOMMENT TO REGENERATE THE AOU LEGACY DATAPROC IMAGE -# You would also need to change the debian version, see https://github.com/DataBiosphere/leonardo/pull/3871 -#docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" - # The version of python to install # Note: this should match the version of python in the terra-jupyter-hail image. python_version="3.10.9" From 9741e772e612121b588db7423c2b53abba81b788 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Mon, 5 May 2025 14:38:50 -0400 Subject: [PATCH 26/35] we also need to point aou to the legacy init script --- .../init-actions-aou-dataproc21.sh | 538 ++++++++++++++++++ http/src/main/resources/reference.conf | 11 +- .../config/ClusterResourcesConfig.scala | 1 + .../workbench/leonardo/config/Config.scala | 2 + .../leonardo/config/DataprocConfig.scala | 1 + .../leonardo/util/BucketHelper.scala | 1 + .../leonardo/util/DataprocInterpreter.scala | 20 +- .../workbench/leonardo/CommonTestData.scala | 1 + 8 files changed, 561 insertions(+), 14 deletions(-) create mode 100644 http/src/main/resources/init-resources/init-actions-aou-dataproc21.sh diff --git a/http/src/main/resources/init-resources/init-actions-aou-dataproc21.sh b/http/src/main/resources/init-resources/init-actions-aou-dataproc21.sh new file mode 100644 index 00000000000..2296d8d4110 --- /dev/null +++ b/http/src/main/resources/init-resources/init-actions-aou-dataproc21.sh @@ -0,0 +1,538 @@ +#!/usr/bin/env bash + +set -e -x + +# AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + +# This is the very first script as we started on Dataproc +# +# This init script instantiates the tool (e.g. Jupyter) docker images on the Dataproc cluster master node. +# Adapted from https://github.com/GoogleCloudPlatform/dataproc-initialization-actions/blob/master/datalab/datalab.sh +# + +# +# Functions +# + +# Retry a command up to a specific number of times until it exits successfully, +# with exponential back off. +# +# $ retry 5 echo "Hello" +# Hello +# +# $ retry 5 false +# Retry 1/5 exited 1, retrying in 2 seconds... +# Retry 2/5 exited 1, retrying in 4 seconds... +# Retry 3/5 exited 1, retrying in 8 seconds... +# Retry 4/5 exited 1, retrying in 16 seconds... +# Retry 5/5 exited 1, no more retries left. +function retry { + local retries=$1 + shift + + for ((i = 1; i <= $retries; i++)); do + # run with an 'or' so set -e doesn't abort the bash script on errors + exit=0 + "$@" || exit=$? + if [ $exit -eq 0 ]; then + return 0 + fi + wait=$((2 ** $i)) + if [ $i -eq $retries ]; then + log "Retry $i/$retries exited $exit, no more retries left." + break + fi + log "Retry $i/$retries exited $exit, retrying in $wait seconds..." + sleep $wait + done + return 1 +} + +function log() { + echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@" +} + +function betterAptGet() { + if ! { apt-get update 2>&1 || echo E: update failed; } | grep -q '^[WE]:'; then + return 0 + else + return 1 + fi +} + +function apply_user_script() { + local CONTAINER_NAME=$1 + local TARGET_DIR=$2 + + log "Running user script $USER_SCRIPT_URI in $CONTAINER_NAME container..." + USER_SCRIPT=`basename ${USER_SCRIPT_URI}` + if [[ "$USER_SCRIPT_URI" == 'gs://'* ]]; then + gsutil cp ${USER_SCRIPT_URI} /etc + else + curl $USER_SCRIPT_URI -o /etc/${USER_SCRIPT} + fi + docker cp /etc/${USER_SCRIPT} ${CONTAINER_NAME}:${TARGET_DIR}/${USER_SCRIPT} + retry 3 docker exec -u root ${CONTAINER_NAME} chmod +x ${TARGET_DIR}/${USER_SCRIPT} + + # Execute the user script as privileged to allow for deeper customization of VM behavior, e.g. installing + # network egress throttling. As docker is not a security layer, it is assumed that a determined attacker + # can gain full access to the VM already, so using this flag is not a significant escalation. + EXIT_CODE=0 + docker exec --privileged -u root -e PIP_USER=false ${CONTAINER_NAME} ${TARGET_DIR}/${USER_SCRIPT} &> us_output.txt || EXIT_CODE=$? + + if [ $EXIT_CODE -ne 0 ]; then + log "User script failed with exit code $EXIT_CODE. Output is saved to $USER_SCRIPT_OUTPUT_URI." + retry 3 gsutil -h "x-goog-meta-passed":"false" cp us_output.txt ${USER_SCRIPT_OUTPUT_URI} + exit $EXIT_CODE + else + retry 3 gsutil -h "x-goog-meta-passed":"true" cp us_output.txt ${USER_SCRIPT_OUTPUT_URI} + fi +} + +function apply_start_user_script() { + local CONTAINER_NAME=$1 + local TARGET_DIR=$2 + + log "Running start user script $START_USER_SCRIPT_URI in $CONTAINER_NAME container..." + START_USER_SCRIPT=`basename ${START_USER_SCRIPT_URI}` + if [[ "$START_USER_SCRIPT_URI" == 'gs://'* ]]; then + gsutil cp ${START_USER_SCRIPT_URI} /etc + else + curl $START_USER_SCRIPT_URI -o /etc/${START_USER_SCRIPT} + fi + docker cp /etc/${START_USER_SCRIPT} ${CONTAINER_NAME}:${TARGET_DIR}/${START_USER_SCRIPT} + retry 3 docker exec -u root ${CONTAINER_NAME} chmod +x ${TARGET_DIR}/${START_USER_SCRIPT} + + # Keep in sync with startup.sh + EXIT_CODE=0 + docker exec --privileged -u root -e PIP_USER=false ${CONTAINER_NAME} ${TARGET_DIR}/${START_USER_SCRIPT} &> start_output.txt || EXIT_CODE=$? + if [ $EXIT_CODE -ne 0 ]; then + echo "User start script failed with exit code ${EXIT_CODE}. Output is saved to ${START_USER_SCRIPT_OUTPUT_URI}" + retry 3 gsutil -h "x-goog-meta-passed":"false" cp start_output.txt ${START_USER_SCRIPT_OUTPUT_URI} + exit $EXIT_CODE + else + retry 3 gsutil -h "x-goog-meta-passed":"true" cp start_output.txt ${START_USER_SCRIPT_OUTPUT_URI} + fi +} + +# +# Main +# + +# +# Array for instrumentation +# UPDATE THIS IF YOU ADD MORE STEPS: +# currently the steps are: +# START init, +# .. after env setup +# .. after copying files from google and into docker +# .. after docker compose +# .. after welder start +# .. after hail and spark +# .. after nbextension install +# .. after server extension install +# .. after combined extension install +# .. after user script +# .. after lab extension install +# .. after jupyter notebook start +# END +STEP_TIMINGS=($(date +%s)) +# temp workaround for https://github.com/docker/compose/issues/5930 +export CLOUDSDK_PYTHON=python3 + +# This identifies whether we are running on the master node (running the jupyter container). There does not seem to be any customization of the worker nodes +ROLE=$(/usr/share/google/get_metadata_value attributes/dataproc-role) + +# Only initialize tool and proxy docker containers on the master +if [[ "${ROLE}" == 'Master' ]]; then + JUPYTER_HOME=/etc/jupyter + JUPYTER_SCRIPTS=${JUPYTER_HOME}/scripts + KERNELSPEC_HOME=/usr/local/share/jupyter/kernels + + # Set variables + # Values like $(..) are populated by Leo when a cluster is created. + # See https://github.com/DataBiosphere/leonardo/blob/e46acfcb409b11198b1f12533cefea3f6c7fdafb/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/RuntimeTemplateValues.scala#L192 + # Avoid exporting variables unless they are needed by external scripts or docker-compose files. + export CLOUD_SERVICE='DATAPROC' + # Needs to be in sync with terra-docker container + export JUPYTER_USER_HOME=$(jupyterHomeDirectory) + export CLUSTER_NAME=$(clusterName) + export RUNTIME_NAME=$(clusterName) + export GOOGLE_PROJECT=$(googleProject) + export STAGING_BUCKET=$(stagingBucketName) + export OWNER_EMAIL=$(loginHint) + export PET_SA_EMAIL=$(petSaEmail) + export JUPYTER_SERVER_NAME=$(jupyterServerName) + export RSTUDIO_SERVER_NAME=$(rstudioServerName) + export PROXY_SERVER_NAME=$(proxyServerName) + export WELDER_SERVER_NAME=$(welderServerName) + export CRYPTO_DETECTOR_SERVER_NAME=$(cryptoDetectorServerName) + export JUPYTER_DOCKER_IMAGE=$(jupyterDockerImage) + export RSTUDIO_DOCKER_IMAGE=$(rstudioDockerImage) + export PROXY_DOCKER_IMAGE=$(proxyDockerImage) + export WELDER_DOCKER_IMAGE=$(welderDockerImage) + export CRYPTO_DETECTOR_DOCKER_IMAGE=$(cryptoDetectorDockerImage) + export WELDER_ENABLED=$(welderEnabled) + export NOTEBOOKS_DIR=$(notebooksDir) + export MEM_LIMIT=$(memLimit) + export SHM_SIZE=$(shmSize) + export WELDER_MEM_LIMIT=$(welderMemLimit) + export PROXY_SERVER_HOST_NAME=$(proxyServerHostName) + export CERT_DIRECTORY='/certs' + export WORK_DIRECTORY='/work' + export DOCKER_COMPOSE_FILES_DIRECTORY='/etc' + PROXY_SITE_CONF=$(proxySiteConf) + export HOST_PROXY_SITE_CONF_FILE_PATH=${DOCKER_COMPOSE_FILES_DIRECTORY}/`basename ${PROXY_SITE_CONF}` + if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then + export SHOULD_BACKGROUND_SYNC="true" + else + export SHOULD_BACKGROUND_SYNC="false" + fi + + SERVER_CRT=$(proxyServerCrt) + SERVER_KEY=$(proxyServerKey) + ROOT_CA=$(rootCaPem) + JUPYTER_DOCKER_COMPOSE=$(jupyterDockerCompose) + RSTUDIO_DOCKER_COMPOSE=$(rstudioDockerCompose) + PROXY_DOCKER_COMPOSE=$(proxyDockerCompose) + WELDER_DOCKER_COMPOSE=$(welderDockerCompose) + PROXY_SITE_CONF=$(proxySiteConf) + JUPYTER_SERVER_EXTENSIONS=$(jupyterServerExtensions) + JUPYTER_NB_EXTENSIONS=$(jupyterNbExtensions) + JUPYTER_COMBINED_EXTENSIONS=$(jupyterCombinedExtensions) + JUPYTER_LAB_EXTENSIONS=$(jupyterLabExtensions) + USER_SCRIPT_URI=$(userScriptUri) + USER_SCRIPT_OUTPUT_URI=$(userScriptOutputUri) + START_USER_SCRIPT_URI=$(startUserScriptUri) + # Include a timestamp suffix to differentiate different startup logs across restarts. + START_USER_SCRIPT_OUTPUT_URI="$(startUserScriptOutputUri)" + JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI=$(jupyterNotebookFrontendConfigUri) + CUSTOM_ENV_VARS_CONFIG_URI=$(customEnvVarsConfigUri) + RSTUDIO_SCRIPTS=/etc/rstudio/scripts + RSTUDIO_USER_HOME=/home/rstudio + INIT_BUCKET_NAME=$(initBucketName) + + STEP_TIMINGS+=($(date +%s)) + + log 'Copying secrets from GCS...' + + mkdir /work + mkdir /certs + chmod a+rwx /work + + # Add the certificates from the bucket to the VM. They are used by the docker-compose file + gsutil cp ${SERVER_CRT} /certs + gsutil cp ${SERVER_KEY} /certs + gsutil cp ${ROOT_CA} /certs + gsutil cp gs://${INIT_BUCKET_NAME}/* ${DOCKER_COMPOSE_FILES_DIRECTORY} + + + # GCP connector is used by dataproc to connect with the staging bucket to read the logs + touch /hadoop_gcs_connector_metadata_cache + touch auth_openidc.conf + + + ## Note that the stack driver configuration is changing in later versions of Dataproc, see https://broadworkbench.atlassian.net/browse/IA-5023 + # Add stack driver configuration for welder + tee /etc/google-fluentd/config.d/welder.conf << END + + @type tail + format json + path /work/welder.log + pos_file /var/tmp/fluentd.welder.pos + read_from_head true + tag welder + +END + + # Add stack driver configuration for jupyter + tee /etc/google-fluentd/config.d/jupyter.conf << END + + @type tail + format none + path /work/jupyter.log + pos_file /var/tmp/fluentd.jupyter.pos + read_from_head true + tag jupyter + +END + + # Add stack driver configuration for user startup and shutdown scripts + tee /etc/google-fluentd/config.d/daemon.conf << END + + @type tail + format none + path /var/log/daemon.log + pos_file /var/tmp/fluentd.google.user.daemon.pos + read_from_head true + tag daemon + +END + + service google-fluentd reload + + # Install env var config + if [ ! -z ${CUSTOM_ENV_VARS_CONFIG_URI} ] ; then + log 'Copy custom env vars config...' + gsutil cp ${CUSTOM_ENV_VARS_CONFIG_URI} /var + fi + + + # If any image is hosted in a GAR registry (detected by regex) then + # authorize docker to interact with gcr.io. + # NOTE: GCR images are now hosted on GAR, but the file paths haven't changed, they automatically redirect. + if grep -qF "gcr.io" <<< "${JUPYTER_DOCKER_IMAGE}${RSTUDIO_DOCKER_IMAGE}${PROXY_DOCKER_IMAGE}${WELDER_DOCKER_IMAGE}" ; then + log 'Authorizing GCR/GAR...' + gcloud auth configure-docker + fi + + STEP_TIMINGS+=($(date +%s)) + + log 'Starting up the Jupyter docker...' + + # Run docker-compose for each specified compose file. + # Note the `docker-compose pull` is retried to avoid intermittent network errors, but + # `docker-compose up` is not retried. + COMPOSE_FILES=(-f /etc/`basename ${PROXY_DOCKER_COMPOSE}`) + + cat /etc/`basename ${PROXY_DOCKER_COMPOSE}` + + if [ ! -z ${WELDER_DOCKER_IMAGE} ] && [ "${WELDER_ENABLED}" == "true" ] ; then + COMPOSE_FILES+=(-f /etc/`basename ${WELDER_DOCKER_COMPOSE}`) + cat /etc/`basename ${WELDER_DOCKER_COMPOSE}` + fi + + if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then + TOOL_SERVER_NAME=${JUPYTER_SERVER_NAME} + COMPOSE_FILES+=(-f /etc/`basename ${JUPYTER_DOCKER_COMPOSE}`) + cat /etc/`basename ${JUPYTER_DOCKER_COMPOSE}` + fi + + if [ ! -z ${RSTUDIO_DOCKER_IMAGE} ] ; then + TOOL_SERVER_NAME=${RSTUDIO_SERVER_NAME} + COMPOSE_FILES+=(-f /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}`) + cat /etc/`basename ${RSTUDIO_DOCKER_COMPOSE}` + fi + + retry 5 docker-compose "${COMPOSE_FILES[@]}" config + retry 5 docker-compose "${COMPOSE_FILES[@]}" pull + retry 5 docker-compose "${COMPOSE_FILES[@]}" up -d + + # Start up crypto detector, if enabled. + # This should be started after other containers. + # Use `docker run` instead of docker-compose so we can link it to the Jupyter/RStudio container's network. + # See https://github.com/broadinstitute/terra-cryptomining-security-alerts/tree/master/v2 + if [ ! -z "$CRYPTO_DETECTOR_DOCKER_IMAGE" ] ; then + docker run --name=${CRYPTO_DETECTOR_SERVER_NAME} --rm -d \ + --net=container:${TOOL_SERVER_NAME} ${CRYPTO_DETECTOR_DOCKER_IMAGE} + fi + + STEP_TIMINGS+=($(date +%s)) + + # Jupyter-specific setup, only do if Jupyter is installed + if [ ! -z ${JUPYTER_DOCKER_IMAGE} ] ; then + log 'Installing Jupydocker kernelspecs...' + + # Install notebook.json + if [ ! -z ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} ] ; then + log 'Copy Jupyter frontend notebook config...' + gsutil cp ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI} /etc + JUPYTER_NOTEBOOK_FRONTEND_CONFIG=`basename ${JUPYTER_NOTEBOOK_FRONTEND_CONFIG_URI}` + retry 3 docker exec -u root ${JUPYTER_SERVER_NAME} /bin/bash -c "mkdir -p $JUPYTER_HOME/nbconfig" + docker cp /etc/${JUPYTER_NOTEBOOK_FRONTEND_CONFIG} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/nbconfig/ + fi + + STEP_TIMINGS+=($(date +%s)) + + # Install NbExtensions. These are user-specified Jupyter extensions. + # For instance Terra UI is passing + # { + # "nbExtensions": { + # "saturn-iframe-extension": "https://bvdp-saturn-dev.appspot.com/jupyter-iframe-extension.js" + # }, + # "labExtensions": {}, + # "serverExtensions": {}, + # "combinedExtensions": {} + # } + if [ ! -z "${JUPYTER_NB_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_NB_EXTENSIONS} + do + log 'Installing Jupyter NB extension [$ext]...' + if [[ $ext == 'gs://'* ]]; then + gsutil cp $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then + JUPYTER_EXTENSION_FILE=`basename $ext` + curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} + docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_notebook_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + else + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_notebook_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # Install serverExtensions if provided by the user + if [ ! -z "${JUPYTER_SERVER_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_SERVER_EXTENSIONS} + do + log 'Installing Jupyter server extension [$ext]...' + if [[ $ext == 'gs://'* ]]; then + gsutil cp $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_server_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + else + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_server_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # Install combined extensions if provided by the user + if [ ! -z "${JUPYTER_COMBINED_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_COMBINED_EXTENSIONS} + do + log 'Installing Jupyter combined extension [$ext]...' + log $ext + if [[ $ext == 'gs://'* ]]; then + gsutil cp $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_combined_extension.sh ${JUPYTER_EXTENSION_ARCHIVE} + else + retry 3 docker exec -u root -e PIP_USER=false ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_pip_install_combined_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # If a user script was specified, copy it into the docker container and execute it. + if [ ! -z "$USER_SCRIPT_URI" ] ; then + apply_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME + fi + + # done user script + STEP_TIMINGS+=($(date +%s)) + + # If a start user script was specified, copy it into the docker container for consumption during startups. + if [ ! -z "$START_USER_SCRIPT_URI" ] ; then + apply_start_user_script $JUPYTER_SERVER_NAME $JUPYTER_HOME + fi + + # done start user script + STEP_TIMINGS+=($(date +%s)) + + # Install lab extensions if provided by the user + # Note: lab extensions need to installed as jupyter user, not root + if [ ! -z "${JUPYTER_LAB_EXTENSIONS}" ] ; then + for ext in ${JUPYTER_LAB_EXTENSIONS} + do + log 'Installing JupyterLab extension [$ext]...' + pwd + if [[ $ext == 'gs://'* ]]; then + gsutil cp -r $ext /etc + JUPYTER_EXTENSION_ARCHIVE=`basename $ext` + docker cp /etc/${JUPYTER_EXTENSION_ARCHIVE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_ARCHIVE} + elif [[ $ext == 'http://'* || $ext == 'https://'* ]]; then + JUPYTER_EXTENSION_FILE=`basename $ext` + curl $ext -o /etc/${JUPYTER_EXTENSION_FILE} + docker cp /etc/${JUPYTER_EXTENSION_FILE} ${JUPYTER_SERVER_NAME}:${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh ${JUPYTER_HOME}/${JUPYTER_EXTENSION_FILE} + else + retry 3 docker exec ${JUPYTER_SERVER_NAME} ${JUPYTER_SCRIPTS}/extension/jupyter_install_lab_extension.sh $ext + fi + done + fi + + STEP_TIMINGS+=($(date +%s)) + + # See IA-1901: Jupyter UI stalls indefinitely on initial R kernel connection after cluster create/resume + # The intent of this is to "warm up" R at VM creation time to hopefully prevent issues when the Jupyter + # kernel tries to connect to it. + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "R -e '1+1'" || true + + # jupyter_delocalize.py now assumes welder's url is `http://welder:8080`, but on dataproc, we're still using host network + # A better to do this might be to take welder host as an argument to the script + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "sed -i 's/http:\/\/welder/http:\/\/127.0.0.1/g' /etc/jupyter/custom/jupyter_delocalize.py" + + # In new jupyter images, we should update jupyter_notebook_config.py in terra-docker. + # This is to make it so that older images will still work after we change notebooks location to home dir + docker exec ${JUPYTER_SERVER_NAME} sed -i '/^# to mount there as it effectively deletes existing files on the image/,+5d' ${JUPYTER_HOME}/jupyter_notebook_config.py + + # Copy gitignore into jupyter container + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "wget https://raw.githubusercontent.com/DataBiosphere/terra-docker/045a139dbac19fbf2b8c4080b8bc7fff7fc8b177/terra-jupyter-aou/gitignore_global" + + # Install nbstripout and set gitignore in Git Config + docker exec $JUPYTER_SERVER_NAME /bin/bash -c "pip install nbstripout \ + && python -m nbstripout --install --global \ + && git config --global core.excludesfile $JUPYTER_USER_HOME/gitignore_global" + + # Install the custom jupyter extensions needed to lock notebooks into edit or safe modes (required by AOU) + docker exec -u 0 $JUPYTER_SERVER_NAME /bin/bash -c "$JUPYTER_HOME/scripts/extension/install_jupyter_contrib_nbextensions.sh \ + && mkdir -p $JUPYTER_USER_HOME/.jupyter/custom/ \ + && cp $JUPYTER_HOME/custom/google_sign_in.js $JUPYTER_USER_HOME/.jupyter/custom/ \ + && ls -la $JUPYTER_HOME/custom/extension_entry_jupyter.js \ + && cp $JUPYTER_HOME/custom/extension_entry_jupyter.js $JUPYTER_USER_HOME/.jupyter/custom/custom.js \ + && cp $JUPYTER_HOME/custom/safe-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ + && cp $JUPYTER_HOME/custom/edit-mode.js $JUPYTER_USER_HOME/.jupyter/custom/ \ + && mkdir -p $JUPYTER_HOME/nbconfig" + + log 'Starting Jupyter Notebook...' + retry 3 docker exec -d ${JUPYTER_SERVER_NAME} /bin/bash -c "${JUPYTER_SCRIPTS}/run-jupyter.sh ${NOTEBOOKS_DIR}" + + STEP_TIMINGS+=($(date +%s)) + fi + + # RStudio specific setup; only do if RStudio is installed + if [ ! -z "$RSTUDIO_DOCKER_IMAGE" ] ; then + EXIT_CODE=0 + retry 3 docker exec ${RSTUDIO_SERVER_NAME} ${RSTUDIO_SCRIPTS}/set_up_package_dir.sh || EXIT_CODE=$? + if [ $EXIT_CODE -ne 0 ]; then + echo "RStudio user package installation directory creation failed, creating /packages directory" + docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c "mkdir -p ${RSTUDIO_USER_HOME}/packages && chmod a+rwx ${RSTUDIO_USER_HOME}/packages" + fi + + # Add the EVs specified in rstudio-docker-compose.yaml to Renviron.site + retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'echo "GOOGLE_PROJECT=$GOOGLE_PROJECT +CLUSTER_NAME=$CLUSTER_NAME +RUNTIME_NAME=$RUNTIME_NAME +OWNER_EMAIL=$OWNER_EMAIL +SHOULD_BACKGROUND_SYNC=$SHOULD_BACKGROUND_SYNC" >> /usr/local/lib/R/etc/Renviron.site' + + # Add custom_env_vars.env to Renviron.site + CUSTOM_ENV_VARS_FILE=/var/custom_env_vars.env + if [ -f "$CUSTOM_ENV_VARS_FILE" ]; then + retry 3 docker cp ${CUSTOM_ENV_VARS_FILE} ${RSTUDIO_SERVER_NAME}:/usr/local/lib/R/var/custom_env_vars.env + retry 3 docker exec ${RSTUDIO_SERVER_NAME} /bin/bash -c 'cat /usr/local/lib/R/var/custom_env_vars.env >> /usr/local/lib/R/etc/Renviron.site' + fi + + # If a user script was specified, copy it into the docker container and execute it. + if [ ! -z "$USER_SCRIPT_URI" ] ; then + apply_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS + fi + + # If a start user script was specified, copy it into the docker container for consumption during startups. + if [ ! -z "$START_USER_SCRIPT_URI" ] ; then + apply_start_user_script $RSTUDIO_SERVER_NAME $RSTUDIO_SCRIPTS + fi + + # Start RStudio server + retry 3 docker exec -d ${RSTUDIO_SERVER_NAME} /init + fi + + # Remove any unneeded cached images to save disk space. + # Do this asynchronously so it doesn't hold up cluster creation + log 'Pruning docker images...' + docker image prune -a -f & +fi + +log 'All done!' +log "Timings: ${STEP_TIMINGS[@]}" \ No newline at end of file diff --git a/http/src/main/resources/reference.conf b/http/src/main/resources/reference.conf index 2f5d4f5f599..635d0949aa8 100644 --- a/http/src/main/resources/reference.conf +++ b/http/src/main/resources/reference.conf @@ -70,9 +70,10 @@ dataproc { } # Cached dataproc 2.2.x image used by Terra - customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-2-52-debian12-2025-05-01-21-02-02" - # Cached dataproc 2.1.x image used by AOU - legacyAouCustomDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-0-51-debian10-2023-12-05-21-47-12" + customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-2-52-debian12-2025-05-05-17-09-59" + # AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + # Cached dataproc 2.1.x image used by AOU + legacyAouCustomDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-1-11-debian11-2025-05-05-17-17-52" # The ratio of memory allocated to spark. 0.8 = 80%. # Hail/Spark users generally allocate 80% of the ram to the JVM. @@ -113,7 +114,7 @@ dataproc { } gce { - customGceImage = "projects/broad-dsp-gcr-public/global/images/leo-gce-image-2025-05-02-18-01-22" + customGceImage = "projects/broad-dsp-gcr-public/global/images/leo-gce-image-2025-05-05-17-10-22" userDiskDeviceName = "user-disk" defaultScopes = [ "https://www.googleapis.com/auth/userinfo.email", @@ -953,6 +954,8 @@ gceClusterResources { clusterResources { initScript = "init-actions.sh" + # AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + legacyAOUInitScript = "init-actions-aou-dataproc-21.sh" startupScript = "startup.sh" shutdownScript = "shutdown.sh" jupyterDockerCompose = "jupyter-docker-compose.yaml" diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala index 8955524efa6..e8420582bf3 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala @@ -2,6 +2,7 @@ package org.broadinstitute.dsde.workbench.leonardo package config case class ClusterResourcesConfig(initScript: RuntimeResource, + legacyAOUInitScript: RuntimeResource, cloudInit: Option[RuntimeResource], startupScript: RuntimeResource, shutdownScript: RuntimeResource, diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala index 25db97e0248..99f0021cbbf 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala @@ -124,6 +124,7 @@ object Config { DataprocConfig( config.getStringList("defaultScopes").asScala.toSet, config.as[DataprocCustomImage]("customDataprocImage"), + // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod config.as[DataprocCustomImage]("legacyAouCustomDataprocImage"), config.getAs[Double]("sparkMemoryConfigRatio"), config.getAs[Double]("minimumRuntimeMemoryInGb"), @@ -200,6 +201,7 @@ object Config { config => ClusterResourcesConfig( config.as[RuntimeResource]("initScript"), + config.as[RuntimeResource]("legacyAOUInitScript"), config.getAs[RuntimeResource]("cloudInit"), config.as[RuntimeResource]("startupScript"), config.as[RuntimeResource]("shutdownScript"), diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala index a4bea7bc594..8aedbb9a029 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala @@ -7,6 +7,7 @@ import org.broadinstitute.dsde.workbench.leonardo.CustomImage.DataprocCustomImag final case class DataprocConfig( defaultScopes: Set[String], customDataprocImage: DataprocCustomImage, + // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod legacyAouCustomDataprocImage: DataprocCustomImage, sparkMemoryConfigRatio: Option[Double], minimumRuntimeMemoryInGb: Option[Double], diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BucketHelper.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BucketHelper.scala index 3e130e072b0..5e920f5c30a 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BucketHelper.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BucketHelper.scala @@ -209,6 +209,7 @@ class BucketHelper[F[_]]( .emits( List( clusterResourcesConfig.initScript, + clusterResourcesConfig.legacyAOUInitScript, clusterResourcesConfig.jupyterNotebookFrontendConfigUri ) ) diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala index bfbf88e049d..42e6cf2a054 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/DataprocInterpreter.scala @@ -184,19 +184,19 @@ class DataprocInterpreter[F[_]: Parallel]( .compile .drain - // build cluster configuration - initScriptResources = List(config.clusterResourcesConfig.initScript) - initScripts = initScriptResources.map(resource => GcsPath(initBucketName, GcsObjectName(resource.asString))) - - // If we need to support 2 version of dataproc custom image, we'll update this -// dataprocImage = config.dataprocConfig.customDataprocImage + imageUrls = params.runtimeImages.map(_.imageUrl) + // build cluster configuration // We need to maintain the old version of the dataproc image to uncouple the terra from the aou release - imageUrls = params.runtimeImages.map(_.imageUrl) - dataprocImage = + // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + (dataprocImage, initScriptResources) = if (imageUrls.contains("us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.13")) - config.dataprocConfig.legacyAouCustomDataprocImage - else config.dataprocConfig.customDataprocImage + (config.dataprocConfig.legacyAouCustomDataprocImage, + List(config.clusterResourcesConfig.legacyAOUInitScript) + ) + else (config.dataprocConfig.customDataprocImage, List(config.clusterResourcesConfig.initScript)) + + initScripts = initScriptResources.map(resource => GcsPath(initBucketName, GcsObjectName(resource.asString))) // If the cluster is configured with worker private access, then specify the // `leonardo-private` network tag. This tag will be removed from the master node diff --git a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/CommonTestData.scala b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/CommonTestData.scala index 82b6e97c038..cdcce4ca6a5 100644 --- a/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/CommonTestData.scala +++ b/http/src/test/scala/org/broadinstitute/dsde/workbench/leonardo/CommonTestData.scala @@ -236,6 +236,7 @@ object CommonTestData { RuntimeImage(Proxy, imageConfig.proxyImage.imageUrl, None, Instant.now.truncatedTo(ChronoUnit.MICROS)) val customDataprocImage = RuntimeImage(BootSource, "custom_dataproc", None, Instant.now.truncatedTo(ChronoUnit.MICROS)) + // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod val legacyAouCustomDataprocImage = RuntimeImage(BootSource, "legacy_aou_custom_dataproc", None, Instant.now.truncatedTo(ChronoUnit.MICROS)) val cryptoDetectorImage = From ae0172549684d06b15f4358062e0aab403837ebb Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Mon, 5 May 2025 16:10:20 -0400 Subject: [PATCH 27/35] easier to also set up the legacy init scrupt for the gce cluster config --- http/src/main/resources/reference.conf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/http/src/main/resources/reference.conf b/http/src/main/resources/reference.conf index 635d0949aa8..5940ae274f5 100644 --- a/http/src/main/resources/reference.conf +++ b/http/src/main/resources/reference.conf @@ -938,6 +938,9 @@ welder { # cluster scripts and config gceClusterResources { initScript = "gce-init.sh" + # AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod. + # This is a placeholder to avoid making the rest of the code more complicated + legacyAOUInitScript = "gce-init.sh" cloudInit = "cloud-init.yml" startupScript = "startup.sh" shutdownScript = "shutdown.sh" From 47104eff6fc53f75ac72ea5ddfd4ce7813edd7cd Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Mon, 5 May 2025 16:15:32 -0400 Subject: [PATCH 28/35] fix legacy init script name typo --- ...-actions-aou-dataproc21.sh => init-actions-aou-dataproc-21.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename http/src/main/resources/init-resources/{init-actions-aou-dataproc21.sh => init-actions-aou-dataproc-21.sh} (100%) diff --git a/http/src/main/resources/init-resources/init-actions-aou-dataproc21.sh b/http/src/main/resources/init-resources/init-actions-aou-dataproc-21.sh similarity index 100% rename from http/src/main/resources/init-resources/init-actions-aou-dataproc21.sh rename to http/src/main/resources/init-resources/init-actions-aou-dataproc-21.sh From c239e54611f98a60a5d859108a7ad7acaeecb454 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Mon, 5 May 2025 16:57:21 -0400 Subject: [PATCH 29/35] addressing comment from Lucy and adding link to aou prod version --- http/src/main/resources/init-resources/init-actions.sh | 10 ---------- .../prepare-custom-leonardo-jupyter-dataproc-image.sh | 1 + jenkins/gce-custom-images/prepare_gce_image.sh | 2 +- 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/http/src/main/resources/init-resources/init-actions.sh b/http/src/main/resources/init-resources/init-actions.sh index 96ec09fe2ae..6745cf3886e 100644 --- a/http/src/main/resources/init-resources/init-actions.sh +++ b/http/src/main/resources/init-resources/init-actions.sh @@ -147,16 +147,6 @@ STEP_TIMINGS=($(date +%s)) # opsagent.sh which uses the built-in configuration of Ops Agent. # See https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/configuration#default. # -# Detect dataproc image version from its various names -if (! test -v DATAPROC_IMAGE_VERSION) && test -v DATAPROC_VERSION; then - DATAPROC_IMAGE_VERSION="${DATAPROC_VERSION}" -fi - -if [[ $(echo "${DATAPROC_IMAGE_VERSION} < 2.2" | bc -l) == 1 ]]; then - echo "This Dataproc cluster node runs image version ${DATAPROC_IMAGE_VERSION} with pre-installed legacy monitoring agent. Skipping Ops Agent installation." - exit 0 -fi - curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh bash add-google-cloud-ops-agent-repo.sh --also-install diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh index 226e2bba095..822e0d19c1b 100755 --- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh +++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh @@ -29,6 +29,7 @@ anvil_rstudio_bioconductor="us.gcr.io/broad-dsp-gcr-public/anvil-rstudio-biocond # Note that this is the version used currently by AOU in production, the one above can be staged for testing # AN-503: Note that AOU 2.2.13 is using a hail version that still requires dataproc 2.1.x +# You can check which version of the AOU image is used in prod here: https://github.com/all-of-us/workbench/blob/main/api/config/config_prod.json#L15C1-L16C1 terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.13" # If you change this you must also change Leo reference.conf! diff --git a/jenkins/gce-custom-images/prepare_gce_image.sh b/jenkins/gce-custom-images/prepare_gce_image.sh index a7dab5e6915..b8858f689db 100755 --- a/jenkins/gce-custom-images/prepare_gce_image.sh +++ b/jenkins/gce-custom-images/prepare_gce_image.sh @@ -26,7 +26,7 @@ openidc_proxy="broadinstitute/openidc-proxy:2.3.1_2" anvil_rstudio_bioconductor="us.gcr.io/broad-dsp-gcr-public/anvil-rstudio-bioconductor:3.20.1" # Note that this is the version used currently by AOU in production, the one above can be staged for testing -# AN-503: Note that AOU 2.2.13 is using a hail version that still requires dataproc 2.1.x +# You can check which version of the AOU image is used in prod here: https://github.com/all-of-us/workbench/blob/main/api/config/config_prod.json#L15C1-L16C1 terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.13" cos_gpu_installer="gcr.io/cos-cloud/cos-gpu-installer:v2.1.9" From 32fe66459d5bcc2ecb12212cf6d69e9b0f22ec8a Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Tue, 6 May 2025 09:53:37 -0400 Subject: [PATCH 30/35] remove old aou image from dataproc 2.2 --- .../prepare-custom-leonardo-jupyter-dataproc-image.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh index 822e0d19c1b..faa7d96948d 100755 --- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh +++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh @@ -35,9 +35,12 @@ terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.13" # If you change this you must also change Leo reference.conf! cryptomining_detector="us.gcr.io/broad-dsp-gcr-public/cryptomining-detector:0.0.2" -# this array determines which of the above images are baked into the custom image +# this array determines which of the above images are baked into the custom dataproc 2.2.x image # the entry must match the var name above, which must correspond to a valid docker URI -docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" +docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" + +# Comment the above and uncomment this to create the dataproc 2.1.x image +# docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" # The version of python to install # Note: this should match the version of python in the terra-jupyter-hail image. From 89bfb683a09da8d3b775abfa169acf001049a276 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Tue, 6 May 2025 10:28:11 -0400 Subject: [PATCH 31/35] remove new aou image from dataproc 2.1 --- .../prepare-custom-leonardo-jupyter-dataproc-image.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh index faa7d96948d..c23c85bcc61 100755 --- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh +++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh @@ -37,10 +37,10 @@ cryptomining_detector="us.gcr.io/broad-dsp-gcr-public/cryptomining-detector:0.0. # this array determines which of the above images are baked into the custom dataproc 2.2.x image # the entry must match the var name above, which must correspond to a valid docker URI -docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" +# docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" # Comment the above and uncomment this to create the dataproc 2.1.x image -# docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" +docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" # The version of python to install # Note: this should match the version of python in the terra-jupyter-hail image. From e5890627897715076208000c842dde8edc2e8839 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Tue, 6 May 2025 10:55:50 -0400 Subject: [PATCH 32/35] test with smaller dataproc 2 image --- http/src/main/resources/reference.conf | 2 +- .../prepare-custom-leonardo-jupyter-dataproc-image.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/http/src/main/resources/reference.conf b/http/src/main/resources/reference.conf index 5940ae274f5..d37577698e3 100644 --- a/http/src/main/resources/reference.conf +++ b/http/src/main/resources/reference.conf @@ -70,7 +70,7 @@ dataproc { } # Cached dataproc 2.2.x image used by Terra - customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-2-52-debian12-2025-05-05-17-09-59" + customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-2-52-debian12-2025-05-06-13-58-25" # AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod # Cached dataproc 2.1.x image used by AOU legacyAouCustomDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-1-11-debian11-2025-05-05-17-17-52" diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh index c23c85bcc61..faa7d96948d 100755 --- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh +++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh @@ -37,10 +37,10 @@ cryptomining_detector="us.gcr.io/broad-dsp-gcr-public/cryptomining-detector:0.0. # this array determines which of the above images are baked into the custom dataproc 2.2.x image # the entry must match the var name above, which must correspond to a valid docker URI -# docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" +docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" # Comment the above and uncomment this to create the dataproc 2.1.x image -docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" +# docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" # The version of python to install # Note: this should match the version of python in the terra-jupyter-hail image. From 7009a1eccf4cb02cf76cfaa8e97cc2391d64a474 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Tue, 6 May 2025 12:06:32 -0400 Subject: [PATCH 33/35] forgot to point to dataproc 2.1 version --- jenkins/dataproc-custom-images/create_dataproc_image.sh | 4 ++-- .../prepare-custom-leonardo-jupyter-dataproc-image.sh | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/jenkins/dataproc-custom-images/create_dataproc_image.sh b/jenkins/dataproc-custom-images/create_dataproc_image.sh index 1eb29ce7467..54bdb3dda7b 100755 --- a/jenkins/dataproc-custom-images/create_dataproc_image.sh +++ b/jenkins/dataproc-custom-images/create_dataproc_image.sh @@ -27,7 +27,7 @@ TEST_BUCKET="gs://leo-dataproc-image-creation-logs" pushd $WORK_DIR DATAPROC_BASE_NAME="leo-dataproc-image" -DP_VERSION_FORMATTED="2-2-52-debian12" +DP_VERSION_FORMATTED="2-1-11-debian11" # This needs to be unique for each run IMAGE_ID=$(date +"%Y-%m-%d-%H-%M-%S") OUTPUT_IMAGE_NAME="$DATAPROC_BASE_NAME-$DP_VERSION_FORMATTED-$IMAGE_ID" @@ -36,7 +36,7 @@ gcloud config set dataproc/region us-central1 python generate_custom_image.py \ --image-name "$OUTPUT_IMAGE_NAME" \ - --dataproc-version "2.2.52-debian12" \ + --dataproc-version "2.1.11-debian11" \ --customization-script ../prepare-custom-leonardo-jupyter-dataproc-image.sh \ --zone $ZONE \ --gcs-bucket $DATAPROC_IMAGE_BUCKET \ diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh index faa7d96948d..cc3f0212afd 100755 --- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh +++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh @@ -35,12 +35,15 @@ terra_jupyter_aou_old="us.gcr.io/broad-dsp-gcr-public/terra-jupyter-aou:2.2.13" # If you change this you must also change Leo reference.conf! cryptomining_detector="us.gcr.io/broad-dsp-gcr-public/cryptomining-detector:0.0.2" -# this array determines which of the above images are baked into the custom dataproc 2.2.x image +# This array determines which of the above images are baked into the custom dataproc 2.2.x image # the entry must match the var name above, which must correspond to a valid docker URI -docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" +# docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" # Comment the above and uncomment this to create the dataproc 2.1.x image -# docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" +# You would also need to revert the dataproc versions in the create_dataproc_image.sh like this: +# DP_VERSION_FORMATTED="2-1-11-debian11" +# --dataproc-version "2.1.11-debian11" +docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" # The version of python to install # Note: this should match the version of python in the terra-jupyter-hail image. From f6d5f96db7d37a4d6c51a8f7742870ffc1ebb591 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Tue, 6 May 2025 13:41:05 -0400 Subject: [PATCH 34/35] cleanup, time to test! --- http/src/main/resources/reference.conf | 2 +- .../workbench/leonardo/config/ClusterResourcesConfig.scala | 2 +- .../dsde/workbench/leonardo/config/Config.scala | 1 + .../dsde/workbench/leonardo/util/BucketHelper.scala | 1 + jenkins/dataproc-custom-images/create_dataproc_image.sh | 4 ++-- .../prepare-custom-leonardo-jupyter-dataproc-image.sh | 4 ++-- 6 files changed, 8 insertions(+), 6 deletions(-) diff --git a/http/src/main/resources/reference.conf b/http/src/main/resources/reference.conf index d37577698e3..814de241770 100644 --- a/http/src/main/resources/reference.conf +++ b/http/src/main/resources/reference.conf @@ -73,7 +73,7 @@ dataproc { customDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-2-52-debian12-2025-05-06-13-58-25" # AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod # Cached dataproc 2.1.x image used by AOU - legacyAouCustomDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-1-11-debian11-2025-05-05-17-17-52" + legacyAouCustomDataprocImage = "projects/broad-dsp-gcr-public/global/images/leo-dataproc-image-2-1-11-debian11-2025-05-06-16-08-29" # The ratio of memory allocated to spark. 0.8 = 80%. # Hail/Spark users generally allocate 80% of the ram to the JVM. diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala index e8420582bf3..e1afd2e5952 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala @@ -2,7 +2,7 @@ package org.broadinstitute.dsde.workbench.leonardo package config case class ClusterResourcesConfig(initScript: RuntimeResource, - legacyAOUInitScript: RuntimeResource, + legacyAOUInitScript: RuntimeResource, // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod cloudInit: Option[RuntimeResource], startupScript: RuntimeResource, shutdownScript: RuntimeResource, diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala index 99f0021cbbf..5a2fd33a6cf 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/Config.scala @@ -201,6 +201,7 @@ object Config { config => ClusterResourcesConfig( config.as[RuntimeResource]("initScript"), + // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod config.as[RuntimeResource]("legacyAOUInitScript"), config.getAs[RuntimeResource]("cloudInit"), config.as[RuntimeResource]("startupScript"), diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BucketHelper.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BucketHelper.scala index 5e920f5c30a..19e28c86e19 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BucketHelper.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/util/BucketHelper.scala @@ -209,6 +209,7 @@ class BucketHelper[F[_]]( .emits( List( clusterResourcesConfig.initScript, + // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod clusterResourcesConfig.legacyAOUInitScript, clusterResourcesConfig.jupyterNotebookFrontendConfigUri ) diff --git a/jenkins/dataproc-custom-images/create_dataproc_image.sh b/jenkins/dataproc-custom-images/create_dataproc_image.sh index 54bdb3dda7b..1eb29ce7467 100755 --- a/jenkins/dataproc-custom-images/create_dataproc_image.sh +++ b/jenkins/dataproc-custom-images/create_dataproc_image.sh @@ -27,7 +27,7 @@ TEST_BUCKET="gs://leo-dataproc-image-creation-logs" pushd $WORK_DIR DATAPROC_BASE_NAME="leo-dataproc-image" -DP_VERSION_FORMATTED="2-1-11-debian11" +DP_VERSION_FORMATTED="2-2-52-debian12" # This needs to be unique for each run IMAGE_ID=$(date +"%Y-%m-%d-%H-%M-%S") OUTPUT_IMAGE_NAME="$DATAPROC_BASE_NAME-$DP_VERSION_FORMATTED-$IMAGE_ID" @@ -36,7 +36,7 @@ gcloud config set dataproc/region us-central1 python generate_custom_image.py \ --image-name "$OUTPUT_IMAGE_NAME" \ - --dataproc-version "2.1.11-debian11" \ + --dataproc-version "2.2.52-debian12" \ --customization-script ../prepare-custom-leonardo-jupyter-dataproc-image.sh \ --zone $ZONE \ --gcs-bucket $DATAPROC_IMAGE_BUCKET \ diff --git a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh index cc3f0212afd..d66ec614e82 100755 --- a/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh +++ b/jenkins/dataproc-custom-images/prepare-custom-leonardo-jupyter-dataproc-image.sh @@ -37,13 +37,13 @@ cryptomining_detector="us.gcr.io/broad-dsp-gcr-public/cryptomining-detector:0.0. # This array determines which of the above images are baked into the custom dataproc 2.2.x image # the entry must match the var name above, which must correspond to a valid docker URI -# docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" +docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" # Comment the above and uncomment this to create the dataproc 2.1.x image # You would also need to revert the dataproc versions in the create_dataproc_image.sh like this: # DP_VERSION_FORMATTED="2-1-11-debian11" # --dataproc-version "2.1.11-debian11" -docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" +#docker_image_var_names="welder_server terra_jupyter_python terra_jupyter_r terra_jupyter_bioconductor terra_jupyter_hail terra_jupyter_gatk terra_jupyter_aou_old openidc_proxy anvil_rstudio_bioconductor cryptomining_detector" # The version of python to install # Note: this should match the version of python in the terra-jupyter-hail image. From 7c9abd78376a659a01a9db03a945adfd3872005f Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Tue, 6 May 2025 13:51:12 -0400 Subject: [PATCH 35/35] fix formatting --- .../config/ClusterResourcesConfig.scala | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala index e1afd2e5952..9313ad68b40 100644 --- a/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala +++ b/http/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/ClusterResourcesConfig.scala @@ -1,20 +1,21 @@ package org.broadinstitute.dsde.workbench.leonardo package config -case class ClusterResourcesConfig(initScript: RuntimeResource, - legacyAOUInitScript: RuntimeResource, // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod - cloudInit: Option[RuntimeResource], - startupScript: RuntimeResource, - shutdownScript: RuntimeResource, - jupyterDockerCompose: RuntimeResource, - gpuDockerCompose: Option[RuntimeResource], // only applies to GCE runtimes - rstudioDockerCompose: RuntimeResource, - proxyDockerCompose: RuntimeResource, - welderDockerCompose: RuntimeResource, - proxySiteConf: RuntimeResource, - jupyterNotebookConfigUri: RuntimeResource, - jupyterNotebookFrontendConfigUri: RuntimeResource, - customEnvVarsConfigUri: RuntimeResource +case class ClusterResourcesConfig( + initScript: RuntimeResource, + legacyAOUInitScript: RuntimeResource, // AN-503: Delete once AOU has switched to using Dataproc 2.2.X in prod + cloudInit: Option[RuntimeResource], + startupScript: RuntimeResource, + shutdownScript: RuntimeResource, + jupyterDockerCompose: RuntimeResource, + gpuDockerCompose: Option[RuntimeResource], // only applies to GCE runtimes + rstudioDockerCompose: RuntimeResource, + proxyDockerCompose: RuntimeResource, + welderDockerCompose: RuntimeResource, + proxySiteConf: RuntimeResource, + jupyterNotebookConfigUri: RuntimeResource, + jupyterNotebookFrontendConfigUri: RuntimeResource, + customEnvVarsConfigUri: RuntimeResource ) object ClusterResourcesConfig {