From 519c9e1501c217726e39dda25b5bbcf1f1aa3054 Mon Sep 17 00:00:00 2001 From: Harika Date: Mon, 2 Feb 2026 16:37:01 -0600 Subject: [PATCH 01/35] adding ubuntu deployment scripts Signed-off-by: Harika --- .../EI/single-node/troubleshooting.md | 355 ++++++++++ .../EI/single-node/user-guide-apisix.md | 301 +++++++++ .../EI/single-node/user-guide-genai.md | 262 +++++++ third_party/Dell/ubuntu-22.04/iac/README.md | 235 +++++++ .../iac/deploy-enterprise-inference.sh | 638 ++++++++++++++++++ third_party/Dell/ubuntu-22.04/iac/main.tf | 118 ++++ .../Dell/ubuntu-22.04/iac/mount-iso.sh | 188 ++++++ .../Dell/ubuntu-22.04/iac/terraform.tfvars | 6 + .../Dell/ubuntu-22.04/iac/variables.tf | 80 +++ .../ubuntu-22.04/iac/verify-installation.sh | 99 +++ 10 files changed, 2282 insertions(+) create mode 100644 third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md create mode 100644 third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md create mode 100644 third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md create mode 100644 third_party/Dell/ubuntu-22.04/iac/README.md create mode 100644 third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh create mode 100644 third_party/Dell/ubuntu-22.04/iac/main.tf create mode 100644 third_party/Dell/ubuntu-22.04/iac/mount-iso.sh create mode 100644 third_party/Dell/ubuntu-22.04/iac/terraform.tfvars create mode 100644 third_party/Dell/ubuntu-22.04/iac/variables.tf create mode 100644 third_party/Dell/ubuntu-22.04/iac/verify-installation.sh diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md b/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md new file mode 100644 index 00000000..90bb4bd7 --- /dev/null +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md @@ -0,0 +1,355 @@ +# Troubleshooting Guide + +This section provides common deployment and runtime issues observed during Intel® AI for Enterprise Inference setup — along with step-by-step resolutions. + +**Issues:** + 1. [Missing Default User](#1-ansible-deployment-failure--missing-default-user) + 2. [Authorization or sudo Password Failure](#2-authorization-or-sudo-password-failure) + 3. [Configuration Mismatch (Wrong Parameters)](#3-configuration-mismatch-wrong-parameters) + 4. [Kubernetes Cluster Not Reachable](#4-kubernetes-cluster-not-reachable) + 5. [Habana Device Plugin CrashLoopBackOff](#5-habana-device-plugin-crashloopbackoff) + 6. [Model pods remain in "Pending" state](#6-model-pods-remain-in-pending-state) + 7. [Models' Output is Garbled and/or Model Pods Failing](#7-models-output-is-garbled-andor-model-pods-failing) + 8. [Model Deployment Failure with Padding-aware scheduling](#8-model-deployment-failure-with-padding-aware-scheduling) + 9. [Inference Stack Deploy Keycloak System Error](#9-inference-stack-deploy-keycloak-system-error) + 10. [Kubernetes pods failing with "disk pressure"](#10-kubernetes-pods-failing-with-disk-pressure) + 11. [Hugging face authentication failure](#11-Hugging-face-authentication-failure) + 12. [Docker Image Pull Failure](#12-Docker-Image-Pull-Failure) + 13. [Triton Package Compatibility Issue](#13-triton-package-compatibility-issue) +--- + +### 1. Ansible Deployment Failure — Missing Default User + +TASK [download : Prep_download | Create staging directory on remote node] +fatal: [master1]: FAILED! => {"msg": "chown failed: failed to look up user ubuntu"} + + +**Cause:** + +The default Ansible user "ubuntu" does not exist on your system. + +**Fix:** + +Many cloud images create the "ubuntu" user by default, but your system may not have it. Edit the inventory file to change the Ansible user name to your user: +```bash +vi inventory/hosts.yaml +``` + +Update the "ansible_user" with the user that owns Enterprise Inference, in the example below, just "user": + +```bash +all: + hosts: + master1: + ansible_connection: local + ansible_user: user + ansible_become: true +``` + +--- + +### 2. Authorization or sudo Password Failure + +Deployment fails with authorization or privilege escalation issues. + +**Fix:** + +Two options: +1. every time, just prior to executing inference-stack-deploy.sh, execute "sudo echo sudoing" and enter your sudo password. This normally will keep your sudo authorization in effect through the execution of inference-stack-deploy.sh. +2. Add `--ask-become-pass` parameter in the inference-stack-deploy.sh script. Specifically, append this flag after `--become-user=root` in the `ansible-playbook` command of `run_reset_playbook()` and `run_fresh_install_playbook()` (lines 821 and 865). NOTE that this will mean the script will wait for input of your sudo password each time it is run. + +--- + +### 3. Configuration Mismatch (Wrong Parameters) + +Deployment fails due to incorrect or missing configuration values. + +**Fix:** +Before re-running deployment, verify and update your inference-config.cfg: +```bash +cluster_url=api.example.com +cert_file=~/certs/cert.pem +key_file=~/certs/key.pem +keycloak_client_id=api +keycloak_admin_user=api-admin +keycloak_admin_password=changeme!! +vault_pass_code=place-holder-123 +deploy_kubernetes_fresh=on +deploy_ingress_controller=on +deploy_keycloak_apisix=on +deploy_genai_gateway=off +deploy_observability=off +deploy_llm_models=on +deploy_ceph=off +deploy_istio=off +``` + +--- + +### 4. Kubernetes Cluster Not Reachable + +Deployment shows “cluster not reachable” or kubectl command failures. + +**Possible Causes & Fixes:** + + - **Cause:** Sudo authorization is not cached + + - **Fix:** Prior to executing inference-stack-deploy.sh, execute any sudo command, such as `sudo echo sudoing`. That will cache your credentials for the time that inference-stack-deploy.sh is executing. + + - **Cause:** Ansible was uninstalled + + - **Fix:** Reinstall manually: + +```bash +sudo apt update +sudo apt install -y ansible +``` + + - **Cause:** Kubernetes configuration mismatch + + - **Fix:** Ensure `~/.kube/config` exists and the context points to the correct cluster. + + - **Cause:** Sudo is stripping the kubectl path from the environment, so kubectl is not found. + + - **Fix:** Ensure that the sudoers file includes the path `/usr/local/bin` in the `secure_path` variable. See the user-guide prerequisites for details. + +--- + +### 5. Habana Device Plugin CrashLoopBackOff + +habana-ai-device-plugin-ds-* CrashLoopBackOff +ERROR: failed detecting Habana's devices on the system: get device name: no habana devices on the system + +**Cause:** +Device plugin unable to detect Gaudi3 PCIe cards. + +**Fix:** +Update your Habana device plugin version. Version 1.22.1-6 is recommended. + +kubectl set image pod/habana-ai-device-plugin-ds-tjbch \ + habana-ai-device-plugin=vault.habana.ai/docker-k8s-device-plugin/docker-k8s-device-plugin:1.22.1-6 + +**Verification:** + +```bash +kubectl get pods -A +``` + +Note: Ensure the habana-ai-device-plugin status changes to Running. + +Check driver/NIC versions hl-smi +Confirm runtime version `dpkg -l +Validate Kubernetes health kubectl get nodes -o wide +Check device plugin logs kubectl logs -n habana-ai-operator + +--- + +### 6. Model Pods Remain in "Pending" State + +Problem: After the inference stack is deployed, model pods remain in the "Pending" state and do not progress to the "Running" state, as shown here: + +```bash +user@master1:~/Enterprise-Inference/core$ kubectl get pods +NAME READY STATUS RESTARTS AGE +keycloak-0 1/1 Running 0 15m +keycloak-postgresql-0 1/1 Running 0 15m +vllm-deepkseek-r1-qwen-32b-64b885895f-dh566 0/1 Pending 0 10m +vllm-llama-8b-786d7678ff-6fr6l 0/1 Pending 0 10m +``` + +This can occur if the habana-ai-operator pod does not identify that the gaudi3 devices are allocatable. To check if this is the reason, execute the following command: + +```bash +kubectl describe node master1 +``` + +Look for the the "Capacity" and "Allocatable" sections as below, and ensure that both list the correct number of habana.ai/gaudi3 devices for your hardware. + +```bash +Capacity: + habana.ai/gaudi: 8 +Allocatable: + habana.ai/gaudi: 8 +``` + +If the "Allocatable" section shows zero (0), your pods will remain in the pending state. +To resolve this, execute the following command to restart the operator so it registers the devices: + +```bash +kubectl rollout restart ds habana-ai-device-plugin-ds -n habana-ai-operator +``` + +If the "rollout restart" does not resolve the issue, a system restart often works to fix it. + +--- + +### 7. Models' Output is Garbled and/or Model Pods Failing + +IOMMU passthrough is required for Gaudi 3 on **Ubuntu 24.04.2/22.04.5 with Linux kernel 6.8**, and models can produce garbled output or fail if this setting is not applied. Skip this section if a different OS or kernel version is used. + +To enable IOMMU passthrough: +1. Add `GRUB_CMDLINE_LINUX_DEFAULT="iommu=pt intel_iommu=on"` to `/etc/default/grub`. +2. Run sudo update-grub. +3. Reboot the system. + +--- + +### 8. Model Deployment Failure with Padding-aware scheduling + +**Error:** Padding-aware scheduling currently does not work with chunked prefill + +**Casue:** This issue occurs when the --use-padding-aware-scheduling flag is enabled while deploying a vLLM model on Habana Gaudi3. +The current vLLM version (v0.9.0.1+Gaudi-1.22.0) does not support using padding-aware scheduling together with chunked prefill. + +**Fix:** If your workload doesn’t require padding-aware scheduling, you can disable it to allow deployment to proceed. + +Edit your `gaudi3-values.yaml` file. Locate and remove the following flag from the vLLM startup command: +```bash +--use-padding-aware-scheduling +``` + +Redeploy the vLLM Helm chart: +```bash +helm upgrade --install vllm-llama-8b ./core/helm-charts/vllm \ + --values ./core/helm-charts/vllm/gaudi3-values.yaml +``` + +Confirm the pod starts successfully: +```bash +kubectl get pods +kubectl logs -f +``` + +--- + +### 9. Inference Stack Deploy Keycloak System Error + +**Error:** TASK \[Deploy Keycloak System\] FAILED! ... "Failure when executing Helm command ... response status code 429: toomanyrequests: You have reached your unauthenticated pull rate limit." + +**Cause:** This error was seen when attempting a redeployment (running inference_stack_deploy.sh, menu "1) Provision Enterprise Inference Cluster") when the Keycloak service is already installed and the inference_config.cfg "deploy_keycloak_apisix"="on". + +**Fix:** Update inference_config.cfg to change "deploy_keycloak_apisix=on" to "deploy_keycloak_apisix=off" and rerun inference_stack_deploy.sh. + +--- + +### 10. Kubernetes pods failing with "disk pressure" + +If pods are hanging in "pending" state or in CrashLoopBackoff with "disk pressure" messages when examining logs (kubectl logs or kubectl describe pod ), you may be lacking space on a required filesystem. The Enterprise Inference standard installation will use /opt/local-path-provisioner for model local storage. Ensure this location has sufficient space allocated. It is recommended that you undeploy any failing models, allocate more space to the local-path-provisioner, then redeploy your models. + +--- + +### 11. Hugging face authentication failure + +**Error :** Deployment fails or hangs when running inference-stack-deploy.sh or while deploying models with below error + +```bash +su "${USERNAME}" -c "cd /home/${USERNAME}/Enterprise-Inference/core && echo -e '1\n${MODELS}\nyes' | bash ./inference-stack-deploy.sh --models '${MODELS}' --cpu-or-gpu '${GPU_TYPE}' --hugging-face-token ${HUGGINGFACE_TOKEN}" resolution to this is getting new hygging face and updating in inference-config +``` +**Cause:** The Hugging Face token passed via --hugging-face-token does not match the token stored in inference-config.cfg, or the token has expired / been revoked. + +**Fix:** + +1. Check if hugging face token has required permission for model trying to deploy. +2. Check if hugging face token is expired. generate new hugging face token, Update your inference-config.cfg and run inference + +--- + +### 12. Docker Image Pull Failure + +**Error:** During deployment, the image download task fails and retries multiple times: +```bash +TASK [download : Download_container | Download image if required] +FAILED - RETRYING: [master1]: Download_container | Download image if required +``` + +**Cause**: Docker Hub enforces pull rate limits for unauthenticated users. +When multiple images are pulled during Enterprise Inference deployment, the limit may be exceeded, causing HTTP 429 Too Many Requests. + +This commonly occurs when: + +Re-running deployments multiple times + +Deploying on fresh nodes without Docker authentication + +Multiple images are pulled in quick succession + +**Fix:** + +Verify the issue with a manual pull test +```bash +sudo ctr -n k8s.io images pull docker.io/library/registry:2.8.1 +``` + +If this fails with 429 Too Many Requests, Docker Hub rate limiting is confirmed. + +**Option A — Authenticate to Docker Hub** + +-> Log in to Docker Hub so containerd can pull images with higher limits. +```bash +sudo docker login +``` +-> Enter your Docker Hub username and password (or access token). + +-> After login, retry the image pull: +```bash +sudo ctr -n k8s.io images pull docker.io/kubernetesui/metrics-scraper:v1.0.8 +``` + +**Option B — Wait for Rate Limit Reset** + +Docker Hub rate limits typically reset after a few hours. wait 2–4 hours and retry deployment or image pull + +### 13. Triton Package Compatibility Issue + +**Error:** +During model deployment, the inference service may fail to start and worker processes may exit unexpectedly with an error similar to: + +> RuntimeError: Worker failed with error *module `triton` has no attribute `next_power_of_2`*. + +**Cause:** +This issue is caused by a compatibility mismatch between the Triton package and the vLLM execution path used during model deployment. It commonly occurs when deploying models using vLLM with default parameter, when Triton is present but does not fully support the required execution path, or when deployments target CPU or accelerator-based platforms (including Gaudi) without platform-specific tuning. As a result, +vLLM workers fail during initialization and the inference engine does not reach a ready state. + +**Fix:** +Apply the Intel-recommended environment variables and command-line parameters during model deployment to ensure vLLM uses a compatible execution path. + +**Environment Variables (YAML):** +```yaml +VLLM_CPU_KVCACHE_SPACE: "40" +VLLM_RPC_TIMEOUT: "100000" +VLLM_ALLOW_LONG_MAX_MODEL_LEN: "1" +VLLM_ENGINE_ITERATION_TIMEOUT_S: "120" +VLLM_CPU_NUM_OF_RESERVED_CPU: "0" +VLLM_CPU_SGL_KERNEL: "1" +HF_HUB_DISABLE_XET: "1" +``` + +**Extra Command Arguments (YAML list):** +```yaml +- "--block-size" +- "128" +- "--dtype" +- "bfloat16" +- "--distributed_executor_backend" +- "mp" +- "--enable_chunked_prefill" +- "--enforce-eager" +- "--max-model-len" +- "33024" +- "--max-num-batched-tokens" +- "2048" +- "--max-num-seqs" +- "256" +``` + +**Notes:** +Tensor parallelism and pipeline parallelism are determined dynamically based on the deployment configuration: + +```yaml +tensor_parallel_size: "{{ .Values.tensor_parallel_size }}" +pipeline_parallel_size: "{{ .Values.pipeline_parallel_size }}" +``` + +**Result:** +After applying the recommended parameters, model deployment completes successfully and the inference service starts without worker initialization failures. + diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md new file mode 100644 index 00000000..98c4637f --- /dev/null +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md @@ -0,0 +1,301 @@ +# Intel® AI for Enterprise Inference - Ubuntu (APISIX) + +## Table of Contents +- [Overview](#overview) +- [Prerequisites](#prerequisites) + - [1. System Requirements](#1-system-requirements) + - [2. SSH Key Setup](#2-ssh-key-setup) + - [3. DNS and SSL/TLS Setup](#3-dns-and-ssltls-setup) + - [4. Hugging Face Token Setup](#4-hugging-face-token-setup) +- [Single Node Deployment Guide](#single-node-deployment-guide) + - [1. Clone the Repository](#1-clone-the-repository) + - [2. Configure the Setup Files and Environment](#2-configure-the-setup-files-and-environment) + - [3. Run the Deployment](#3-run-the-deployment) + - [4. Verify the Deployment](#4-verify-the-deployment) + - [5. Test the Inference](#5-test-the-inference) +- [Troubleshooting](#troubleshooting) +- [Summary](#summary) + +--- + +## Overview +This guide walks you through the setup and deployment of **Intel® AI for Enterprise Inference** in **single-node** environment. +It is designed for new users who may not be familiar with server configuration or AI inference deployment. + +**You’ll Learn How To:** + +- Prepare your system environment +- Set up SSH, DNS, SSL/TLS, and Hugging Face tokens +- Run automated scripts for Intel® Gaudi® accelerators +- Deploy and test the inference stack on a single node + +--- + +## Prerequisites +Before starting the deployment, ensure your system meets the following requirements. + +### 1. System Requirements + +| Requirement | Description | +|--------------|-------------| +| **Operating System** | Ubuntu 22.04 LTS | +| **Access** | Root or sudo privileges | +| **Network** | Internet connection for package installation | +| **Optional Accelerator SW Versions** | Intel® Gaudi® AI Accelerator hardware (for GPU workloads) | +| - **HL-SMI Version (hl)** | ≥1.21.3 | +| - **Firmware Version (fw)** | 61.0.2.0 | +| - **SPI / Preboot Firmware (Gaudi3**) | ≥1.22.0-fw-61.3.2-sec-3 | +| - **Driver Version** | ≥1.21.3-f063886 | +| - **NIC Driver Version** | ≥1.21.3-94c920f | +| - **Habana Container Runtime** | ≥ 1.22.1-6 | + +#### Sudo Setup + +Ensure `sudo` preserves `/usr/local/bin` in the PATH. Execute the following to check that `/usr/local/bin` is in /etc/sudoers `secure_path`: + +```bash +$ sudo cat /etc/sudoers | grep secure_path +Defaults secure_path = /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin +``` + +If you do NOT see `/usr/local/bin`, use `sudo visudo` to edit the sudoers file and append it as you see in the sample output above. + +### 2. SSH Key Setup +SSH keys are required to allow **Ansible** or automation scripts to connect securely to your nodes. + +1. **Generate a new SSH key pair:** + ```bash + ssh-keygen -t rsa -b 4096 + ``` + + - Press '**Enter**' to accept defaults. + - You can name your key if desired. + - Leave the password field blank. + +2. **Distribute the public key:** + + Copy the contents of your `id_rsa.pub` file to authorized_keys: + ```bash + echo "" >> ~/.ssh/authorized_keys + ``` + +3. **Verify access:** + + Test SSH connectivity: + ```bash + chmod 600 + ssh -i @ + ``` + +### 3. DNS and SSL/TLS Setup + +1. **Generate a self-signed certificate:** + + Use OpenSSL to generate a temporary certificate: + ```bash + mkdir -p ~/certs && cd ~/certs + openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -days 365 -nodes -subj "/CN=api.example.com" + ``` + + This will generate: + `cert.pem` → certificate + `key.pem` → private key + +2. **Map your DNS to your local IP (only if not registered in DNS):** + + If your domain is not registered in DNS, you can map it manually by editing your /etc/hosts file + ```bash + hostname -I # Get your machine's private IP + sudo nano /etc/hosts + ``` + + Add this line: + ```bash + 127.0.0.1 api.example.com + ``` + + Save and exit with CTRL+X → Y → Enter. + + **Important:** This manual mapping is only required if your machine’s hostname is not resolvable via DNS. + If your domain is already managed by a DNS provider, skip this step. + +### 4. Hugging Face Token Setup + 1. Visit huggingface.com and log in (or create an account). + 2. Go to **Settings → Access** Tokens. + 3. Click “**New Token**”, enter a name, and copy the generated value. + 4. Store it securely — you’ll need it for deployment. + +--- + +## Single Node Deployment Guide +This section explains how to deploy Intel® AI for Enterprise Inference on a single Ubuntu 22.04 server. + +**Prerequisites** +- Ubuntu 22.04 server ready +- Root or sudo access + +--- + +### 1. Clone the Repository + +```bash +cd ~ +git clone https://github.com/opea-project/Enterprise-Inference.git +cd Enterprise-Inference +git checkout release-1.4.0 +``` + +--- + +### 2. Configure the Setup Files and Environment + +**Update inference-config.cfg:** +Update configuration files for single node deployment. Note that changes to the users and passwords in this file will impact the verification step settings in section 4 below. + +* Production installations should set your own values +* Add your Hugging Face token +* Set the cpu_or_gpu value to "cpu" for Xeon models and "gaudi3" for Intel Gaudi 3 accelerator models +* Set deploy_genai_gateway to on, and make deploy_keycloak_apisix to off + +```bash +vi core/inventory/inference-config.cfg +``` + +Sample default values (insert your token) for a full deployment of the inference stack with Llama-8B model. + +``` +cluster_url=api.example.com +cert_file=~/certs/cert.pem +key_file=~/certs/key.pem +keycloak_client_id=api +keycloak_admin_user=api-admin +keycloak_admin_password=changeme!! +hugging_face_token=your_hugging_face_token +hugging_face_token_falcon3=your_hugging_face_token +models=1 +cpu_or_gpu=gaudi3 +vault_pass_code=place-holder-123 +deploy_kubernetes_fresh=on +deploy_ingress_controller=on +deploy_keycloak_apisix=on +deploy_genai_gateway=off +deploy_observability=off +deploy_llm_models=on +deploy_ceph=off +deploy_istio=off +uninstall_ceph=off +``` + +To support non-interactive execution of inference-stack-deploy.sh, create a file named "core/inentory/.become-passfile" with your user's sudo password: + +```bash +vi core/inentory/.become-passfile +chmod 600 core/inentory/.become-passfile +``` +**Update hosts.yaml File** +Copy the single node preset hosts config file to the working directory: +```bash +cp -f docs/examples/single-node/hosts.yaml core/inventory/hosts.yaml +``` +> Note: The ansible_user field is set to ubuntu by default. Change it to the actual username used. + + +### 3. Run the Deployment + +Run the setup for Gaudi _(the "models" and "cpu-or-gpu" parameters are only needed if they are not set in inference-config.cfg)_: + +```bash +cd core +chmod +x inference-stack-deploy.sh +./inference-stack-deploy.sh --models "1" --cpu-or-gpu "gaudi3" +``` + +Run the setup for CPU _(the "models" and "cpu-or-gpu" parameters are only needed if they are not set in inference-config.cfg)_: + +```bash +cd core +chmod +x inference-stack-deploy.sh +./inference-stack-deploy.sh --models "21" --cpu-or-gpu "cpu" +``` + +When prompted, choose option **1) Provision Enterprise Inference Cluster** and confirm **Yes** to start installation. +If using Intel® Gaudi® hardware, make sure firmware and drivers are updated before running this script. + +### 4. Verify the Deployment + +Verify Pods Status +```bash +kubectl get pods +``` +Expected States: +- All pods Running +- No CrashLoopBackOff +- No Pending pods + +verify routes +```bash +kubectl get apisixroutes +``` + +--- + +### 5. Test the Inference + +**Environment Setup** +```bash +export CLUSTER_URL=api.example.com +export BASE_URL=https://api.example.com +export KEYCLOAK_REALM=master +export KEYCLOAK_CLIENT_ID=api +export KEYCLOAK_CLIENT_SECRET=$(bash scripts/keycloak-fetch-client-secret.sh api.example.com api-admin 'changeme!!' api | awk -F': ' '/Client secret:/ {print $2}') +``` + +**Obtain Access Token** + +```bash +export TOKEN=$(curl -k -X POST $BASE_URL/token \ + -H 'Content-Type: application/x-www-form-urlencoded' \ + -d "grant_type=client_credentials&client_id=${KEYCLOAK_CLIENT_ID}&client_secret=${KEYCLOAK_CLIENT_SECRET}" \ + | jq -r .access_token) +``` + +**Run a test query for Gaudi:** +```bash +curl -k ${BASE_URL}/Llama-3.1-8B-Instruct/v1/completions \ +-X POST \ +-d '{"model": "meta-llama/Llama-3.1-8B-Instruct", "prompt": "What is Deep Learning?", "max_tokens": 25, "temperature": 0}' \ +-H 'Content-Type: application/json' \ +-H "Authorization: Bearer $TOKEN" +``` + +**Run a test query for CPU:** +```bash +curl -k ${BASE_URL}/Llama-3.1-8B-Instruct-vllmcpu/v1/completions \ +-X POST \ +-d '{"model": "meta-llama/Llama-3.1-8B-Instruct", "prompt": "What is Deep Learning?", "max_tokens": 25, "temperature": 0}' \ +-H 'Content-Type: application/json' \ +-H "Authorization: Bearer $TOKEN" +``` + + +If successful, the model will return a completion response. + +--- + +## Troubleshooting + +This document provides common deployment and runtime issues observed during Intel® AI for Enterprise Inference setup — along with step-by-step resolutions. + +[**Troubleshooting Guide**](./troubleshooting.md) + +--- + +## Summary + +**You’ve successfully:** + +- Verified system readiness +- Configured SSH, DNS, and SSL +- Generated your Hugging Face token +- Deployed Intel® AI for Enterprise Inference +- Tested a working model endpoint diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md new file mode 100644 index 00000000..d096ffda --- /dev/null +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md @@ -0,0 +1,262 @@ +# Intel® AI for Enterprise Inference - Ubuntu (Gen AI) + +## Table of Contents +- [Overview](#overview) +- [Prerequisites](#prerequisites) + - [1. System Requirements](#1-system-requirements) + - [2. SSH Key Setup](#2-ssh-key-setup) + - [3. DNS and SSL/TLS Setup](#3-dns-and-ssltls-setup) + - [4. Hugging Face Token Setup](#4-hugging-face-token-setup) +- [Single Node Deployment Guide](#single-node-deployment-guide) + - [1. Clone the Repository](#1-clone-the-repository) + - [2. Configure the Setup Files and Environment](#2-configure-the-setup-files-and-environment) + - [3. Run the Deployment](#3-run-the-deployment) + - [4. Verify the Deployment](#4-verify-the-deployment) + - [5. Test the Inference](#5-test-the-inference) +- [Summary](#summary) + +--- + +## Overview +This guide walks you through the setup and deployment of **Intel® AI for Enterprise Inference** in **single-node** environment. +It is designed for new users who may not be familiar with server configuration or AI inference deployment. + +**You’ll Learn How To:** + +- Prepare your system environment +- Set up SSH, DNS, SSL/TLS, and Hugging Face tokens +- Run automated scripts for Intel® Gaudi® accelerators +- Deploy and test the inference stack on a single node + +--- + +## Prerequisites +Before starting the deployment, ensure your system meets the following requirements. + +### 1.System Requirements + +| Requirement | Description | +|--------------|-------------| +| **Operating System** | Ubuntu 22.04 LTS | +| **Access** | Root or sudo privileges | +| **Network** | Internet connection for package installation | +| **Optional Accelerator SW Versions** | Intel® Gaudi® AI Accelerator hardware (for GPU workloads) | +| **HL-SMI Version (hl)** | ≥1.21.3 | +| **Firmware Version (fw)** | 61.0.2.0 | +| **SPI / Preboot Firmware (Gaudi3**) | ≥1.22.0-fw-61.3.2-sec-3 | +| **Driver Version** | ≥1.21.3-f063886 | +| **NIC Driver Version** | ≥1.21.3-94c920f | +| **Habana Container Runtime** | ≥ 1.22.1-6 | +| **Enterprise Inference Version** | release-1.4.0 | + +#### Sudo Setup + +Ensure `sudo` preserves `/usr/local/bin` in the PATH. Execute the following to check that /usr/local/bin is in /etc/sudoers `secure_path`: + +```bash +$ sudo cat /etc/sudoers | grep secure_path +Defaults secure_path = /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin +``` + +If you do NOT see `/usr/local/bin`, use `sudo visudo` to edit the sudoers file and append it as you see in the sample output above. + +### 2. SSH Key Setup +SSH keys are required to allow **Ansible** or automation scripts to connect securely to your nodes. + +1. **Generate a new SSH key pair:** + ```bash + ssh-keygen -t rsa -b 4096 + ``` + + - Press '**Enter**' to accept defaults. + - You can name your key if desired. + - Leave the password field blank. + +2. **Distribute the public key:** + + Copy the contents of your `id_rsa.pub` file to authorized_keys: + ```bash + echo "" >> ~/.ssh/authorized_keys + ``` + +3. **Verify access:** + + Test SSH connectivity: + ```bash + chmod 600 + ssh -i @ + ``` + +### 3. DNS and SSL/TLS Setup + +1. **Generate a self-signed certificate:** + + Use OpenSSL to generate a temporary certificate: + ```bash + mkdir -p ~/certs && cd ~/certs + openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -days 365 -nodes -subj "/CN=api.example.com" + ``` + + This will generate: + `cert.pem` → certificate + `key.pem` → private key + +2. **Map your DNS to your local IP (only if not registered in DNS):** + + If your domain is not registered in DNS, you can map it manually by editing your /etc/hosts file + ```bash + hostname -I # Get your machine's private IP + sudo nano /etc/hosts + ``` + + Add this line: + ```bash + 127.0.0.1 api.example.com + ``` + + Save and exit with CTRL+X → Y → Enter. + + **Important:** This manual mapping is only required if your machine’s hostname is not resolvable via DNS. + If your domain is already managed by a DNS provider, skip this step. + +### 4. Hugging Face Token Setup + 1. Visit huggingface.com and log in (or create an account). + 2. Go to **Settings → Access** Tokens. + 3. Click “**New Token**”, enter a name, and copy the generated value. + 4. Store it securely — you’ll need it for deployment. + +--- + +## Single Node Deployment Guide +This section explains how to deploy Intel® AI for Enterprise Inference on a single Ubuntu 22.04 server. + +### 1. Clone the Repository + +```bash +git clone https://github.com/opea-project/Enterprise-Inference.git +cd Enterprise-Inference +git checkout release-1.4.0 +``` + +### 2. Configure the Setup Files and Environment + +**Update inference-config.cfg** + +* Production installations should set your own values +* Add your Hugging Face token +* Set the cpu_or_gpu value to "cpu" for Xeon models and "gaudi3" for Intel Gaudi 3 accelerator models +* Set deploy_genai_gateway to on, and make deploy_keycloak_apisix to off + +```bash +vi core/inventory/inference-config.cfg +``` + +``` +cluster_url=api.example.com +cert_file=~/certs/cert.pem +key_file=~/certs/key.pem +keycloak_client_id=api +keycloak_admin_user=api-admin +keycloak_admin_password=changeme!! +hugging_face_token=your_hugging_face_token +hugging_face_token_falcon3=your_hugging_face_token +models= +cpu_or_gpu=gaudi3 +vault_pass_code=place-holder-123 +deploy_kubernetes_fresh=on +deploy_ingress_controller=on +deploy_keycloak_apisix=off +deploy_genai_gateway=on +deploy_observability=on +deploy_llm_models=on +deploy_ceph=off +deploy_istio=off +uninstall_ceph=off +``` +**Update hosts.yaml File** + +Copy the single node preset hosts config file to the working directory: +```bash +cp -f docs/examples/single-node/hosts.yaml core/inventory/hosts.yaml +``` +> Note: The ansible_user field is set to ubuntu by default. Change it to the actual username used. + + +### 3. Run the Deployment + +Run the setup for Gaudi _(the "models" and "cpu-or-gpu" parameters are only needed if they are not set in inference-config.cfg)_: + +```bash +cd core +chmod +x inference-stack-deploy.sh +./inference-stack-deploy.sh --models "1" --cpu-or-gpu "gaudi3" +``` + +Run the setup for CPU _(the "models" and "cpu-or-gpu" parameters are only needed if they are not set in inference-config.cfg)_: + +```bash +cd core +chmod +x inference-stack-deploy.sh +./inference-stack-deploy.sh --models "21" --cpu-or-gpu "cpu" +``` + +When prompted, choose option **1) Provision Enterprise Inference Cluster** and confirm **Yes** to start installation. +If using Intel® Gaudi® hardware, make sure firmware and drivers are updated before running this script. + + +### 4. Verify the Deployment +Verify Pods Status +```bash +kubectl get pods +``` +Expected States: +- All pods Running +- No CrashLoopBackOff +- No Pending pods + +### 5. Test the Inference + +```bash +export BASE_URL=https://api.example.com +``` +Reference the litellm_master_key file under core/inventory/metadata/vault.yml for master-key + +**Run a test query for Gaudi:** +```bash +curl -k ${BASE_URL}/Llama-3.1-8B-Instruct/v1/completions \ + -X POST \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer <>" \ + -d '{ + "model": "meta-llama/Llama-3.1-8B-Instruct", + "prompt": "What is Deep Learning?", + "max_tokens": 25, + "temperature": 0 + }' +``` + +**Run a test query for CPU:** +```bash +curl -k ${BASE_URL}/Llama-3.1-8B-Instruct-vllmcpu/v1/completions \ +-X POST \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer <>" \ + -d '{ + "model": "meta-llama/Llama-3.1-8B-Instruct", + "prompt": "What is Deep Learning?", + "max_tokens": 25, + "temperature": 0 + }' +``` + +If successful, the model will return a completion response. + +## Summary + +**You’ve successfully:** + +- Verified system readiness +- Configured SSH, DNS, and SSL +- Generated your Hugging Face token +- Deployed Intel® AI for Enterprise Inference +- Tested a working model endpoint diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md new file mode 100644 index 00000000..032d50c5 --- /dev/null +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -0,0 +1,235 @@ +## Bare-Metal Ubuntu Automation for Enterprise Inference (CPU & Gaudi3) + +This repository provides an **end-to-end, bare-metal automation workflow** to install **Ubuntu 22.04.5**, boot it using **Dell iDRAC Redfish Virtual Media**, and deploy the **Enterprise Inference stack** (CPU or Gaudi3) on a **single-node system**. + +The solution cleanly separates: + +- OS installation (ISO + Redfish) +- Boot orchestration (Terraform) +- Post-OS configuration and inference deployment + +It is designed for repeatable, resumable, and operator-friendly deployments. + +--- + +### 1. Mount Ubuntu ISO (iDRAC Redfish) + +**Script:** [iac/mount-iso.sh](./mount-iso.sh) + +This script mounts or unmounts the **Ubuntu 22.04.5 live server ISO** using the **iDRAC Redfish Virtual Media API**. + +- Mount ISO +- Idempotent (skips if already mounted) + +**Required Environment Variables** +```bash +export IDRAC_IP=100.67.x.x +export IDRAC_USER=root +export IDRAC_PASS=calvin +``` +**Mount ISO** +```bash +./mount-iso.sh +``` +--- + +## 2.Boot Ubuntu Installer (Terraform + Redfish) + +**Script:** [iac/main.tf](./main.tf) + +Terraform uses the **Dell Redfish provider** to configure a **one-time boot from Virtual Media (CD)** and **force a reboot**. + +Key Notes +- ISO must already be mounted using mount-iso.sh +- Boot override is set to Once +- Power reset is forced using redfish_power +- Boot mode (UEFI/Legacy) is not configurable on 17G servers + +**Terraform Variables** + +Update your terraform variables in 'terraform.tfvars' + +Example (terraform.tfvars): +```bash +idrac_endpoint = "https://100.67.x.x" +idrac_user = "root" +idrac_password = "calvin" +idrac_ssl_insecure = true + +ubuntu_username = "user" +ubuntu_hostname = "inference-node" +use_dhcp = true +``` + +**Apply Terraform** +```bash +terraform init +terraform apply +``` + +After terraform apply check you IDRAC console, machine will reboot and Ubuntu installer starts automatically from the mounted ISO. +It will prompt for the user inputs during the installation, provide your inputs and wait for installation to be completed. + +--- + +## 3.Post-OS Enterprise Inference Deployment + +Once OS is installed, login to your machine and run the post-os installations below. + +**Script:** [iac/deploy-enterprise-inference.sh](./deploy-enterprise-inference.sh) + +This script performs **all post-OS configuration** and deploys the **Enterprise Inference stack** on a **single node**. + +**Key Features** +- Resume / checkpoint support +- Safe to re-run after failure +- CPU or Gaudi3 support +- Automated configuration of: + - Packages + - Repo clone + branch checkout + - Inventory & config files + - Firmware & kernel tuning (Gaudi3) + - SSH, sudo, certificates + - Final inference stack deployment + +**Required Parameters** +```bash +sudo ./deploy-enterprise-inference.sh \ +-u user \ +-p Linux123! \ +-t hf_xxxxxxxxxxxxx \ +-g gaudi3 \ +-m "1" +``` + +| Option | Description | +| -------| ------------ | +| -u | OS username | +| -p | OS userpassword | +| -t | Hugging Face token | +| -g | gaudi3 or cpu | +| -m | Model IDs | +| -b | Repo branch (default: release-1.4.0) | +| -r | Resume from last checkpoint | + +**Resume After Failure** + +The deployment script is resume-safe. If a failure occurs, simply rerun the script with the -r flag: +```bash +sudo ./deploy-enterprise-inference.sh -r +``` + +**State is tracked in:** + +Deployment progress is tracked using a local state file: +```bash +/tmp/ei-deploy.state +``` + +**What the Deployment Script Does** + +- Installs system packages +- Clones Enterprise-Inference repo +- Applies single-node inventory defaults +- Updates inference-config.cfg +- Installs Gaudi3 firmware (if applicable) +- Applies kernel/IOMMU tuning (kernel 6.8) +- Configures SSH and sudo +- Generates SSL certificates +- Runs inference-stack-deploy.sh + +--- + +## Verification & Access + +After a successful deployment, verify the system at three levels: OS, Enterprise Inference services, and model inference. + +**1. OS & System Validation** +Verify the node is healthy and running the expected kernel. +```bash +hostname +uname -r +uptime +``` +Expected: +- Hostname matches ubuntu_hostname +- 5.15.0-164-generic +- System uptime is stable (no reboot loops) + +Verify disk and memory +```bash +df -h +free -h +``` + +**2. Enterprise Inference Services** +Verify all inference services are running. +```bash +kubectl get pods +``` +Expected: +- All services in RUNNING state +- No failed systemd units + +Check systemd services manually if needed: +```bash +systemctl list-units --type=service | grep -i inference +``` + +**3. Gaudi3 Verification (Only if -g gaudi3)** +Confirm Gaudi devices and firmware are detected. +```bash +hl-smi +``` +Expected: +- All Gaudi devices visible +- Firmware version matches deployment input + +Verify kernel modules: +```bash +lsmod | grep habanalabs +``` + +**4. API & Networking Validation** +Verify hostname resolution: +```bash +cat /etc/hosts | grep api.example.com +``` +Expected: +- 127.0.0.1 api.example.com + +Verify TLS certificates exist: +```bash +ls -l ~/certs +``` + +Expected: +- cert.pem +- key.pem + + +**5. API Health Check** +Validate the inference gateway is reachable. +```bash +curl -k https://api.example.com/health +``` +Expected: +{"status":"ok"} + +--- + +**6. Test Model Inference** + +if EI is deployed with apisix, follow [Testing EI model with apisix](../EI/single-node/user-guide-apisix.md#5-test-the-inference) for generating token and testing the inference + +if EI is deployed with genai, follow [Testing EI model with genai](../EI/single-node/user-guide-genai.md#5-test-the-inference) for generating api-key and testing the inference + +--- + +## Summary + +This repository provides a clean, deterministic, enterprise-grade deployment pipeline for: + +Bare-metal Ubuntu + Enterprise Inference (CPU/Gaudi3) + + diff --git a/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh b/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh new file mode 100644 index 00000000..43d1668b --- /dev/null +++ b/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh @@ -0,0 +1,638 @@ +#!/bin/bash +# +# Combined Enterprise Inference Stack Deployment Script +# This script combines genai-system-setup.sh, genai-owner-setup.sh, and post-os-setup.sh +# with resume capability to continue from where it failed. +# +# Usage: +# ./deploy-enterprise-inference.sh -u -t [OPTIONS] +# +# Options: +# -u, --username Enterprise Inference owner username (required) +# -t, --token Hugging Face token (required) +# -p, --password User sudo password for Ansible (default: Linux123!) +# -g, --gpu-type GPU type: 'gaudi3' or 'cpu' (default: gaudi3) +# -m, --models Model IDs to deploy, comma-separated (default: "5") +# -b, --branch Git branch to clone (default: dell-deploy) +# -f, --firmware-version Firmware version (default: 1.22.1) +# -d, --deployment-mode Deployment mode: 'keycloak' or 'genai' (default: keycloak) +# -o, --observability Enable observability: 'on' or 'off' (default: off) +# -r, --resume Resume from checkpoint (auto-detected if state file exists) +# -s, --state-file State file path (default: /tmp/ei-deploy.state) +# -h, --help Show this help message +# +# Example: +# ./deploy-enterprise-inference.sh -u user -t hf_xxxxxxxxxxxxx -g gaudi3 -m "5" +# ./deploy-enterprise-inference.sh -u user -t hf_xxxxxxxxxxxxx -g cpu -m "1" -d genai -o on +set -euo pipefail + +# Default values + +USERNAME="Replace-with-your-username" +HF_TOKEN="Replace-with-your-hugging face token" +USER_PASSWORD="Replace-with-your-user-password" +GPU_TYPE="Enter gaudi3/cpu based on your deployment" +MODELS="Enter Model number" +DEPLOYMENT_MODE="keycloak" +DEPLOY_OBSERVABILITY="off" +BRANCH="release-1.4.0" +REPO_URL="https://github.com/opea-project/Enterprise-Inference" +KEYCLOAK_CLIENT_ID="api" +KEYCLOAK_ADMIN_USER="api-admin" +KEYCLOAK_ADMIN_PASSWORD="changeme!!" +FIRMWARE_VERSION="1.22.1" +STATE_FILE="/tmp/ei-deploy.state" +RESUME=false + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Logging functions +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" >&2 +} + +# Usage function +usage() { + cat << EOF +Usage: $0 -u -t [OPTIONS] + +Required Options: + -u, --username Enterprise Inference owner username + -t, --token Hugging Face token + +Optional Options: + -p, --password User sudo password for Ansible (default: Linux123!) + -g, --gpu-type GPU type: 'gaudi3' or 'cpu' (default: gaudi3) + -m, --models Model IDs to deploy, comma-separated (default: "1") + -b, --branch Git branch to clone (default: dell-deploy) + -f, --firmware-version Firmware version (default: 1.22.1) + -d, --deployment-mode Deployment mode: 'keycloak' or 'genai' (default: keycloak) + -o, --observability Enable observability: 'on' or 'off' (default: off) + -s, --state-file State file path (default: /tmp/ei-deploy.state) + -r, --resume Force resume from checkpoint + -h, --help Show this help message + +Example: + $0 -u user -t hf_xxxxxxxxxxxxx -g gaudi3 -m "1" + $0 -u user -t hf_xxxxxxxxxxxxx -g cpu -m "1" -d genai -o on +EOF + exit 1 +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -u|--username) + USERNAME="$2" + shift 2 + ;; + -t|--token) + HF_TOKEN="$2" + shift 2 + ;; + -p|--password) + USER_PASSWORD="$2" + shift 2 + ;; + -g|--gpu-type) + GPU_TYPE="$2" + shift 2 + ;; + -m|--models) + MODELS="$2" + shift 2 + ;; + -b|--branch) + BRANCH="$2" + shift 2 + ;; + -f|--firmware-version) + FIRMWARE_VERSION="$2" + shift 2 + ;; + -d|--deployment-mode) + DEPLOYMENT_MODE="$2" + shift 2 + ;; + -o|--observability) + DEPLOY_OBSERVABILITY="$2" + shift 2 + ;; + -s|--state-file) + STATE_FILE="$2" + shift 2 + ;; + -r|--resume) + RESUME=true + shift + ;; + -h|--help) + usage + ;; + *) + log_error "Unknown option: $1" + usage + ;; + esac +done + +# Validate required parameters +if [[ -z "$USERNAME" ]] || [[ -z "$HF_TOKEN" ]]; then + log_error "Username and Hugging Face token are required" + usage +fi + +# Validate GPU type +if [[ "$GPU_TYPE" != "gaudi3" ]] && [[ "$GPU_TYPE" != "cpu" ]]; then + log_error "GPU type must be 'gaudi3' or 'cpu'" + exit 1 +fi + +# Validate deployment mode +if [[ "$DEPLOYMENT_MODE" != "keycloak" ]] && [[ "$DEPLOYMENT_MODE" != "genai" ]]; then + log_error "Deployment mode must be 'keycloak' or 'genai'" + exit 1 +fi + +# Validate observability setting +if [[ "$DEPLOY_OBSERVABILITY" != "on" ]] && [[ "$DEPLOY_OBSERVABILITY" != "off" ]]; then + log_error "Observability must be 'on' or 'off'" + exit 1 +fi + +# Set deployment variables based on deployment mode +set_deployment_variables() { + case "$DEPLOYMENT_MODE" in + keycloak) + DEPLOY_KEYCLOAK_APISIX="on" + DEPLOY_GENAI_GATEWAY="off" + ;; + genai) + DEPLOY_KEYCLOAK_APISIX="off" + DEPLOY_GENAI_GATEWAY="on" + ;; + esac +} + +# Initialize deployment variables +set_deployment_variables + +# Check if running with root/sudo privileges +if [[ $EUID -ne 0 ]]; then + log_error "This script must be run with sudo privileges" + log_error "Please run: sudo $0 $*" + exit 1 +fi + +INVOKING_USER="${SUDO_USER:-$(whoami)}" + +# never allow root deployment +if [[ "$INVOKING_USER" == "root" ]]; then + log_error "Refusing to deploy as root" + exit 1 +fi + +# If USERNAME was passed, it must match invoking user +if [[ -n "${USERNAME:-}" && "$USERNAME" != "$INVOKING_USER" ]]; then + log_error "Username mismatch detected" + log_error "Invoking user : $INVOKING_USER" + log_error "Provided user : $USERNAME" + log_error "Deployment user must match the invoking user" + exit 1 +fi + +# Final deployment user (single source of truth) +USERNAME="$INVOKING_USER" + +log_info "Deployment user validated: $USERNAME" + +# Check if state file exists (for resume) +if [[ -f "$STATE_FILE" ]] || [[ "$RESUME" == true ]]; then + if [[ -f "$STATE_FILE" ]]; then + log_info "State file found. Resuming from checkpoint..." + source "$STATE_FILE" + RESUME=true + # Re-apply deployment variables based on saved deployment mode + set_deployment_variables + else + log_warn "Resume requested but no state file found. Starting fresh." + RESUME=false + fi +else + RESUME=false +fi + +# State management functions +save_state() { + local step=$1 + cat > "$STATE_FILE" << EOF +# Enterprise Inference Deployment State +# Do not edit this file manually +LAST_COMPLETED_STEP="$step" +USERNAME="$USERNAME" +HF_TOKEN="$HF_TOKEN" +USER_PASSWORD="$USER_PASSWORD" +GPU_TYPE="$GPU_TYPE" +MODELS="$MODELS" +BRANCH="$BRANCH" +DEPLOYMENT_MODE="$DEPLOYMENT_MODE" +REPO_URL="$REPO_URL" +FIRMWARE_VERSION="$FIRMWARE_VERSION" +DEPLOY_OBSERVABILITY="$DEPLOY_OBSERVABILITY" +EOF + log_info "Checkpoint saved: $step" +} + +# Define step order for resume logic +declare -A STEP_ORDER=( + ["system_packages"]=1 + ["clone_repo"]=2 + ["firmware"]=3 + ["kernel_config"]=4 + ["hosts_file"]=5 + ["ssh_setup"]=6 + ["certificates"]=7 + ["deploy_stack"]=8 +) + +check_step() { + local step=$1 + # If not resuming, always run the step + if [[ "$RESUME" != true ]] || [[ -z "${LAST_COMPLETED_STEP:-}" ]]; then + return 1 # Step not completed, should run + fi + + # Get order of current step and last completed step + local current_order=${STEP_ORDER[$step]:-999} + local last_order=${STEP_ORDER[$LAST_COMPLETED_STEP]:-0} + + # If current step order is <= last completed step order, it was already completed + if [[ $current_order -le $last_order ]]; then + return 0 # Step completed, should skip + else + return 1 # Step not completed, should run + fi +} + +skip_if_completed() { + local step=$1 + if check_step "$step"; then + log_info "Step '$step' already completed. Skipping..." + return 0 + fi + return 1 +} + +log_warn "Have you verified that your Hugging Face token has access to the model(s): ${MODELS} ?" +if [[ -t 0 ]]; then + read -r -p "Type 'yes' to continue or anything else to exit: " HF_CONFIRM +else + log_warn "Non-interactive shell detected, auto-continuing Hugging Face check" + HF_CONFIRM="yes" +fi + +if [[ "${HF_CONFIRM}" != "yes" ]]; then + log_error "Please verify Hugging Face model access before running the deployment." + exit 1 +fi + +# Main deployment steps +main() { + log_info "==========================================" + log_info "Enterprise Inference Stack Deployment" + log_info "==========================================" + log_info "Username: $USERNAME" + log_info "GPU Type: $GPU_TYPE" + log_info "Models: $MODELS" + log_info "Branch: $BRANCH" + log_info "Deployment Mode: $DEPLOYMENT_MODE" + log_info " - Keycloak + APISIX: $DEPLOY_KEYCLOAK_APISIX" + log_info " - GenAI Gateway: $DEPLOY_GENAI_GATEWAY" + log_info " - Observability: $DEPLOY_OBSERVABILITY" + log_info "State File: $STATE_FILE" + log_info "Resume Mode: $RESUME" + log_info "==========================================" + echo "" + + # Step 1: Install system packages + if ! skip_if_completed "system_packages"; then + log_info "Step 1: Installing system packages..." + apt-get update + apt-get install -y git openssl curl + log_success "System packages installed" + save_state "system_packages" + fi + + # Step 2: Clone Enterprise Inference repository + if ! skip_if_completed "clone_repo"; then + log_info "Step 2: Cloning Enterprise Inference repository..." + log_info "Repository: ${REPO_URL}" + log_info "Branch: ${BRANCH}" + if [[ -d "/home/${USERNAME}/Enterprise-Inference" ]]; then + log_warn "Enterprise-Inference directory already exists. Skipping clone..." + else + cd "/home/${USERNAME}" + su "${USERNAME}" -c "git clone ${REPO_URL}" + su "${USERNAME}" -c "cd /home/${USERNAME}/Enterprise-Inference && git checkout ${BRANCH}" + log_success "Repository cloned" + fi + + log_info "Applying single-node inventory defaults..." + SRC_BASE="/home/${USERNAME}/Enterprise-Inference/docs/examples/single-node" + DEST_BASE="/home/${USERNAME}/Enterprise-Inference/core/inventory" + + if [[ -d "$SRC_BASE" ]] && [[ -d "$DEST_BASE" ]]; then + cp -f "${SRC_BASE}/hosts.yaml" "${DEST_BASE}/hosts.yaml" + chown "${USERNAME}:${USERNAME}" "${DEST_BASE}/hosts.yaml" + log_success "Single-node hosts.yaml applied" + else + log_warn "Single-node example for hosts not found, skipping copy" + fi + + # Create .become-passfile for Ansible (empty since we configure NOPASSWD) + log_info "Creating Ansible become-passfile..." + INVENTORY_DIR="/home/${USERNAME}/Enterprise-Inference/core/inventory" + + if [[ -d "$INVENTORY_DIR" ]]; then + BECOME_PASSFILE="${INVENTORY_DIR}/.become-passfile" + # Create passfile with user password for Ansible + echo "${USER_PASSWORD}" > "$BECOME_PASSFILE" + chown "${USERNAME}:${USERNAME}" "$BECOME_PASSFILE" + chmod 600 "$BECOME_PASSFILE" + log_success "Ansible become-passfile created at ${BECOME_PASSFILE}" + else + log_warn "Inventory directory not found at ${INVENTORY_DIR}, will create later" + fi + + HOSTS_FILE="/home/${USERNAME}/Enterprise-Inference/core/inventory/hosts.yaml" + + if [[ -f "$HOSTS_FILE" ]]; then + log_info "Updating ansible_user in hosts.yaml to '${USERNAME}'" + + sed -i -E "/^[[:space:]]*master1:/,/^[[:space:]]{2}children:/ s/^([[:space:]]*ansible_user:[[:space:]]*).*/\1${USERNAME}/" "$HOSTS_FILE" + else + log_warn "hosts.yaml not found at ${HOSTS_FILE}, skipping ansible_user update" + fi + + if [[ "$GPU_TYPE" == "cpu" ]] || [[ "$GPU_TYPE" == "gaudi3" ]]; then + CONFIG_FILE="/home/${USERNAME}/Enterprise-Inference/core/inventory/inference-config.cfg" + if [[ -f "$CONFIG_FILE" ]]; then + sed -i -E \ + -e 's/^[[:space:]]*hugging_face_token[[:space:]]*=.*/hugging_face_token='${HF_TOKEN}'/' \ + -e 's/^[[:space:]]*models[[:space:]]*=.*/models='${MODELS}'/' \ + -e 's/^[[:space:]]*cpu_or_gpu[[:space:]]*=.*/cpu_or_gpu='${GPU_TYPE}'/' \ + -e 's/^[[:space:]]*keycloak_client_id[[:space:]]*=.*/keycloak_client_id='${KEYCLOAK_CLIENT_ID}'/' \ + -e 's/^[[:space:]]*keycloak_admin_user[[:space:]]*=.*/keycloak_admin_user='${KEYCLOAK_ADMIN_USER}'/' \ + -e 's/^[[:space:]]*keycloak_admin_password[[:space:]]*=.*/keycloak_admin_password='${KEYCLOAK_ADMIN_PASSWORD}'/' \ + -e 's/^[[:space:]]*deploy_keycloak_apisix[[:space:]]*=.*/deploy_keycloak_apisix='${DEPLOY_KEYCLOAK_APISIX}'/' \ + -e 's/^[[:space:]]*deploy_genai_gateway[[:space:]]*=.*/deploy_genai_gateway='${DEPLOY_GENAI_GATEWAY}'/' \ + -e 's/^[[:space:]]*deploy_observability[[:space:]]*=.*/deploy_observability='${DEPLOY_OBSERVABILITY}'/' \ + "$CONFIG_FILE" + log_info "Updated inference-config.cfg with models='${MODELS}' and cpu_or_gpu=cpu" + else + log_warn "inference-config.cfg not found at $CONFIG_FILE, skipping update." + fi + fi + # ------------------------------------------------------------ + # Disable NRI explicitly for CPU-only deployments + # ------------------------------------------------------------ + if [[ "$GPU_TYPE" == "cpu" ]]; then + log_info "CPU-only mode detected — disabling NRI and CPU balloons" + + # Update if keys exist + sed -i -E \ + -e 's/^[[:space:]]*enable_nri[[:space:]]*=.*/enable_nri=false/' \ + -e 's/^[[:space:]]*enable_cpu_balloons[[:space:]]*=.*/enable_cpu_balloons=false/' \ + "$CONFIG_FILE" || true + + # Append if keys do not exist + grep -q '^enable_nri=' "$CONFIG_FILE" || echo 'enable_nri=false' >> "$CONFIG_FILE" + grep -q '^enable_cpu_balloons=' "$CONFIG_FILE" || echo 'enable_cpu_balloons=false' >> "$CONFIG_FILE" + + log_success "NRI disabled for CPU-only deployment" + fi + save_state "clone_repo" + fi + + # Step 3: Install Gaudi3 firmware (only for gaudi3) + if [[ "$GPU_TYPE" == "gaudi3" ]]; then + if ! skip_if_completed "firmware"; then + log_info "Step 3: Installing Gaudi3 firmware..." + cd "/home/${USERNAME}/Enterprise-Inference/core" + if [[ -f "scripts/firmware-update.sh" ]]; then + chmod u+x scripts/firmware-update.sh + scripts/firmware-update.sh "${FIRMWARE_VERSION}" --force || { + log_warn "Firmware update may have failed, continuing..." + } + log_success "Firmware installation completed" + else + log_warn "Firmware update script not found, skipping..." + fi + save_state "firmware" + fi + else + log_info "Step 3: Skipping firmware (CPU mode)" + save_state "firmware" + fi + + # Step 4: Kernel configuration for kernel 6.8 (only for gaudi3) + if [[ "$GPU_TYPE" == "gaudi3" ]]; then + if ! skip_if_completed "kernel_config"; then + log_info "Step 4: Checking kernel version and configuring IOMMU if needed..." + KERNEL=$(uname -r) + if [[ "$KERNEL" == 6.8.* ]]; then + log_info "Kernel version 6.8 detected. Adding IOMMU configuration..." + if ! grep -q "iommu=pt intel_iommu=on" /etc/default/grub; then + echo "" >> /etc/default/grub + echo "# Gaudi3 requires this option for kernel version 6.8" >> /etc/default/grub + echo 'GRUB_CMDLINE_LINUX_DEFAULT="iommu=pt intel_iommu=on"' >> /etc/default/grub + log_warn "IOMMU configuration added. System restart required after deployment." + else + log_info "IOMMU configuration already present" + fi + else + log_info "Kernel version $KERNEL - no special configuration needed" + fi + save_state "kernel_config" + fi + else + log_info "Step 4: Skipping kernel config (CPU mode)" + save_state "kernel_config" + fi + + # Step 5: Add hostname to /etc/hosts + if ! skip_if_completed "hosts_file"; then + log_info "Step 5: Adding hostname to /etc/hosts..." + if ! grep -q "api.example.com" /etc/hosts; then + echo "" >> /etc/hosts + echo "127.0.0.1 api.example.com" >> /etc/hosts + log_success "Hostname added to /etc/hosts" + else + log_info "Hostname already in /etc/hosts" + fi + save_state "hosts_file" + fi + + # Step 6: Setup SSH keys + if ! skip_if_completed "ssh_setup"; then + log_info "Step 6: Setting up SSH keys..." + cd "/home/${USERNAME}" + + # Create .ssh directory if it doesn't exist + su "${USERNAME}" -c "mkdir -p .ssh" + + # Generate SSH key if it doesn't exist + if [[ ! -f "/home/${USERNAME}/.ssh/id_rsa" ]]; then + su "${USERNAME}" -c "ssh-keygen -t rsa -b 4096 -f /home/${USERNAME}/.ssh/id_rsa -N '' -q" + log_info "SSH key generated" + else + log_info "SSH key already exists" + fi + + # Add public key to authorized_keys + if [[ -f "/home/${USERNAME}/.ssh/id_rsa.pub" ]]; then + PUB_KEY=$(cat "/home/${USERNAME}/.ssh/id_rsa.pub") + if ! grep -q "$PUB_KEY" "/home/${USERNAME}/.ssh/authorized_keys" 2>/dev/null; then + cat "/home/${USERNAME}/.ssh/id_rsa.pub" >> "/home/${USERNAME}/.ssh/authorized_keys" + log_info "Public key added to authorized_keys" + fi + fi + + # Add to known_hosts + su "${USERNAME}" -c "ssh-keyscan -H localhost >> /home/${USERNAME}/.ssh/known_hosts 2>/dev/null || true" + su "${USERNAME}" -c "ssh-keyscan -H 127.0.0.1 >> /home/${USERNAME}/.ssh/known_hosts 2>/dev/null || true" + + # Set proper permissions + chown -R "${USERNAME}:${USERNAME}" "/home/${USERNAME}/.ssh" + chmod 700 "/home/${USERNAME}/.ssh" + chmod 600 "/home/${USERNAME}/.ssh/id_rsa" 2>/dev/null || true + chmod 644 "/home/${USERNAME}/.ssh/id_rsa.pub" 2>/dev/null || true + chmod 600 "/home/${USERNAME}/.ssh/authorized_keys" 2>/dev/null || true + + log_success "SSH setup completed" + save_state "ssh_setup" + fi + + # Step 7: Create SSL certificates + if ! skip_if_completed "certificates"; then + log_info "Step 7: Creating SSL certificates..." + cd "/home/${USERNAME}" + su "${USERNAME}" -c "mkdir -p certs" + cd certs + + if [[ ! -f "cert.pem" ]] || [[ ! -f "key.pem" ]]; then + su "${USERNAME}" -c "openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -days 365 -nodes -subj '/CN=api.example.com'" + log_success "SSL certificates created" + else + log_info "SSL certificates already exist" + fi + save_state "certificates" + fi + + # Step 8: Deploy Enterprise Inference Stack + if ! skip_if_completed "deploy_stack"; then + log_info "Step 8: Deploying Enterprise Inference Stack..." + cd "/home/${USERNAME}/Enterprise-Inference/core" + + if [[ ! -f "inference-stack-deploy.sh" ]]; then + log_error "inference-stack-deploy.sh not found!" + exit 1 + fi + + chmod +x inference-stack-deploy.sh + + # Configure sudo NOPASSWD for the user (required for Ansible) + log_info "Configuring sudo NOPASSWD for user ${USERNAME}..." + if ! grep -q "^${USERNAME}.*NOPASSWD" /etc/sudoers 2>/dev/null; then + echo "${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + log_success "Sudo NOPASSWD configured for ${USERNAME}" + else + log_info "Sudo NOPASSWD already configured for ${USERNAME}" + fi + + # Ensure .become-passfile exists (created in Step 2, but verify here) + log_info "Verifying Ansible become-passfile..." + INVENTORY_DIR="/home/${USERNAME}/Enterprise-Inference/core/inventory" + if [[ ! -d "$INVENTORY_DIR" ]]; then + # Try alternative spelling (typo in docs) + INVENTORY_DIR="/home/${USERNAME}/Enterprise-Inference/core/inentory" + fi + + if [[ -d "$INVENTORY_DIR" ]]; then + BECOME_PASSFILE="${INVENTORY_DIR}/.become-passfile" + if [[ ! -f "$BECOME_PASSFILE" ]]; then + # Create passfile with user password for Ansible + echo "${USER_PASSWORD}" > "$BECOME_PASSFILE" + chown "${USERNAME}:${USERNAME}" "$BECOME_PASSFILE" + chmod 600 "$BECOME_PASSFILE" + log_info "Ansible become-passfile created" + else + # Update passfile with current password + echo "${USER_PASSWORD}" > "$BECOME_PASSFILE" + chown "${USERNAME}:${USERNAME}" "$BECOME_PASSFILE" + chmod 600 "$BECOME_PASSFILE" + log_info "Ansible become-passfile updated" + fi + else + log_warn "Inventory directory not found at ${INVENTORY_DIR}" + fi + + # Export Hugging Face token + export HUGGINGFACE_TOKEN="${HF_TOKEN}" + + log_info "Running inference-stack-deploy.sh..." + log_info "Parameters: --models '${MODELS}' --cpu-or-gpu '${GPU_TYPE}' --hugging-face-token " + + # Run the deployment script + # Using echo to provide input: "1" for "Provision Enterprise Inference Cluster", "yes" for confirmation + su "${USERNAME}" -c "cd /home/${USERNAME}/Enterprise-Inference/core && echo -e '1\nyes' | bash ./inference-stack-deploy.sh --models '${MODELS}' --cpu-or-gpu '${GPU_TYPE}' --hugging-face-token ${HUGGINGFACE_TOKEN}" || { + log_error "Enterprise Inference Stack deployment failed!" + log_warn "You can resume by running this script again with -r flag" + exit 1 + } + + log_success "Enterprise Inference Stack deployed successfully!" + save_state "deploy_stack" + fi + + # Cleanup state file on successful completion + if [[ -f "$STATE_FILE" ]]; then + rm -f "$STATE_FILE" + log_info "State file cleaned up" + fi + + log_success "==========================================" + log_success "Deployment completed successfully!" + log_success "==========================================" + log_info "" + log_info "Next steps:" + log_info "1. If kernel configuration was changed, reboot the system" + log_info "2. Verify the deployment using the verification commands" + log_info "3. Test the inference endpoints" + log_info "" + log_info "Deployment Summary:" + log_info " - Mode: ${DEPLOYMENT_MODE}" + log_info " - GPU Type: ${GPU_TYPE}" + log_info " - Models: ${MODELS}" + log_info " - Observability: ${DEPLOY_OBSERVABILITY}" +} + +# Run main function +main "$@" diff --git a/third_party/Dell/ubuntu-22.04/iac/main.tf b/third_party/Dell/ubuntu-22.04/iac/main.tf new file mode 100644 index 00000000..e16e5567 --- /dev/null +++ b/third_party/Dell/ubuntu-22.04/iac/main.tf @@ -0,0 +1,118 @@ +terraform { + required_providers { + redfish = { + source = "dell/redfish" + version = "1.6.0" + } + null = { + source = "hashicorp/null" + version = "~> 3.0" + } + } +} + +provider "redfish" { + redfish_servers = { + server1 = { + user = var.idrac_user + password = var.idrac_password + endpoint = var.idrac_endpoint + ssl_insecure = var.idrac_ssl_insecure + } + } +} + +resource "redfish_boot_source_override" "boot_from_virtual_media" { + redfish_server { + redfish_alias = "server1" + } + + system_id = "System.Embedded.1" + + boot_source_override_enabled = "Once" + boot_source_override_target = "Cd" + # boot_source_override_mode not supported on 17G servers + # Note: reset_type is required but may not always trigger reboot reliably + # The redfish_power resource below ensures the reboot happens + reset_type = "ForceRestart" + + lifecycle { + # Allow the resource to be replaced/updated when configuration changes + create_before_destroy = false + } +} + +# Wait a few seconds after boot override is set to ensure ISO is ready +# This also acts as a trigger to force the power resource to apply +# To force a reboot, change any value in the triggers (e.g., add a comment with timestamp) +resource "null_resource" "boot_override_trigger" { + depends_on = [redfish_boot_source_override.boot_from_virtual_media] + + provisioner "local-exec" { + command = "echo 'Waiting 5 seconds for ISO to be fully ready before reboot...' && sleep 5" + } + + triggers = { + boot_target = redfish_boot_source_override.boot_from_virtual_media.boot_source_override_target + boot_enabled = redfish_boot_source_override.boot_from_virtual_media.boot_source_override_enabled + boot_override_id = redfish_boot_source_override.boot_from_virtual_media.id + # To force reboot: uncomment and change the timestamp below, or run: terraform taint redfish_power.reboot_for_install + # force_reboot = "2024-01-01T00:00:00Z" + } +} + +resource "redfish_power" "reboot_for_install" { + redfish_server { + redfish_alias = "server1" + } + + system_id = "System.Embedded.1" + + desired_power_action = "ForceRestart" + maximum_wait_time = 120 + + depends_on = [ + redfish_boot_source_override.boot_from_virtual_media, + null_resource.boot_override_trigger + ] + + lifecycle { + # Force replacement when boot override changes to ensure reboot is applied + replace_triggered_by = [ + null_resource.boot_override_trigger + ] + } +} + +output "installation_ready" { + value = { + boot_configured = redfish_boot_source_override.boot_from_virtual_media.boot_source_override_target == "Cd" + boot_override_type = redfish_boot_source_override.boot_from_virtual_media.boot_source_override_enabled + boot_target = redfish_boot_source_override.boot_from_virtual_media.boot_source_override_target + iso_url = "https://releases.ubuntu.com/22.04/ubuntu-22.04.5-live-server-amd64.iso" + ready_to_reboot = redfish_boot_source_override.boot_from_virtual_media.boot_source_override_target == "Cd" + note = "Run ./mount-iso.sh before terraform apply. Boot mode (UEFI/Legacy) not configurable via Terraform on 17G servers." + } + description = "Ubuntu installation readiness status" +} + +output "installation_config" { + value = { + username = var.ubuntu_username + hostname = var.ubuntu_hostname + use_dhcp = var.use_dhcp + static_ip = var.use_dhcp ? "N/A (DHCP)" : var.static_ip + } + description = "Ubuntu installation configuration (password is sensitive)" +} + +output "verification_commands" { + value = { + check_hostname = "curl -sk -u : ${var.idrac_endpoint}/redfish/v1/Systems/System.Embedded.1 | jq -r '.HostName'" + check_boot = "curl -sk -u : ${var.idrac_endpoint}/redfish/v1/Systems/System.Embedded.1/Boot | jq -r '.BootSourceOverrideEnabled'" + idrac_console = "${var.idrac_endpoint} (login: /, then open Virtual Console)" + mount_iso_script = "./mount-iso.sh" + run_script = "./verify-installation.sh" + } + description = "Commands and methods to verify Ubuntu installation (replace and with actual values)" +} diff --git a/third_party/Dell/ubuntu-22.04/iac/mount-iso.sh b/third_party/Dell/ubuntu-22.04/iac/mount-iso.sh new file mode 100644 index 00000000..b7ab76f8 --- /dev/null +++ b/third_party/Dell/ubuntu-22.04/iac/mount-iso.sh @@ -0,0 +1,188 @@ +#!/bin/bash +# Script to mount/unmount Ubuntu ISO via iDRAC Redfish API +# Usage: +# ./mount-iso.sh - Mount the ISO +# ./mount-iso.sh --unmount - Unmount the ISO +# ./mount-iso.sh -u - Unmount the ISO +# +# Environment variables: +# IDRAC_IP or IDRAC_HOST - iDRAC IP address or hostname (required) +# IDRAC_USER or IDRAC_USERNAME - iDRAC username (required) +# IDRAC_PASS or IDRAC_PASSWORD - iDRAC password (required) + +set -e + +# Parse command line arguments +UNMOUNT=false +if [ "$1" = "--unmount" ] || [ "$1" = "-u" ]; then + UNMOUNT=true +fi + +# Read from environment variables with fallback options +IDRAC_IP="${IDRAC_IP:-${IDRAC_HOST}}" +IDRAC_USER="${IDRAC_USER:-${IDRAC_USERNAME}}" +IDRAC_PASS="${IDRAC_PASS:-${IDRAC_PASSWORD}}" + +# Validate required environment variables +if [ -z "$IDRAC_IP" ]; then + echo "❌ Error: IDRAC_IP or IDRAC_HOST environment variable is required" + echo " Example: export IDRAC_IP=100.67.153.16" + exit 1 +fi + +if [ -z "$IDRAC_USER" ]; then + echo "❌ Error: IDRAC_USER or IDRAC_USERNAME environment variable is required" + echo " Example: export IDRAC_USER=root" + exit 1 +fi + +if [ -z "$IDRAC_PASS" ]; then + echo "❌ Error: IDRAC_PASS or IDRAC_PASSWORD environment variable is required" + echo " Example: export IDRAC_PASS=calvin" + exit 1 +fi + +ISO_URL="https://releases.ubuntu.com/22.04/ubuntu-22.04.5-live-server-amd64.iso" +SYSTEM_ID="System.Embedded.1" +VIRTUAL_MEDIA_SLOT="1" + +if [ "$UNMOUNT" = true ]; then + echo "==========================================" + echo "Unmounting Virtual Media via iDRAC Redfish API" + echo "==========================================" + echo "" + + # Check current status + echo "Checking current virtual media status..." + CURRENT_IMAGE=$(curl -sk --max-time 10 --connect-timeout 5 -u "${IDRAC_USER}:${IDRAC_PASS}" \ + "https://${IDRAC_IP}/redfish/v1/Systems/${SYSTEM_ID}/VirtualMedia/${VIRTUAL_MEDIA_SLOT}" \ + 2>/dev/null | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('Image', 'None'))" 2>/dev/null || echo "None") + + if [ "$CURRENT_IMAGE" = "None" ] || [ "$CURRENT_IMAGE" = "null" ] || [ -z "$CURRENT_IMAGE" ]; then + echo "ℹ️ No media is currently mounted" + exit 0 + fi + + echo "Current mounted image: $CURRENT_IMAGE" + echo "" + echo "Unmounting media..." + + RESPONSE=$(curl -sk --max-time 10 --connect-timeout 5 -w "\n%{http_code}" -u "${IDRAC_USER}:${IDRAC_PASS}" \ + -X POST \ + "https://${IDRAC_IP}/redfish/v1/Systems/${SYSTEM_ID}/VirtualMedia/${VIRTUAL_MEDIA_SLOT}/Actions/VirtualMedia.EjectMedia" \ + -H "Content-Type: application/json" \ + -d '{}' 2>&1) + + HTTP_CODE=$(echo "$RESPONSE" | tail -n1) + BODY=$(echo "$RESPONSE" | sed '$d') + + if [[ "$HTTP_CODE" =~ ^(200|202|204)$ ]]; then + echo "✅ Media unmounted successfully!" + echo "" + echo "Verifying unmount..." + sleep 2 + + VERIFY_IMAGE=$(curl -sk --max-time 10 --connect-timeout 5 -u "${IDRAC_USER}:${IDRAC_PASS}" \ + "https://${IDRAC_IP}/redfish/v1/Systems/${SYSTEM_ID}/VirtualMedia/${VIRTUAL_MEDIA_SLOT}" \ + 2>/dev/null | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('Image', 'None'))" 2>/dev/null || echo "None") + + if [ "$VERIFY_IMAGE" = "None" ] || [ "$VERIFY_IMAGE" = "null" ]; then + echo " ✅ Confirmed: No media mounted" + exit 0 + else + echo " ⚠️ Media may still be mounted: $VERIFY_IMAGE" + exit 1 + fi + else + echo "❌ Failed to unmount media. HTTP Code: $HTTP_CODE" + echo "Response: $BODY" + exit 1 + fi +fi + +echo "==========================================" +echo "Mounting Ubuntu ISO via iDRAC Redfish API" +echo "==========================================" +echo "" + +# Check if ISO is already mounted +echo "Checking current virtual media status..." +CURRENT_IMAGE=$(curl -sk --max-time 10 --connect-timeout 5 -u "${IDRAC_USER}:${IDRAC_PASS}" \ + "https://${IDRAC_IP}/redfish/v1/Systems/${SYSTEM_ID}/VirtualMedia/${VIRTUAL_MEDIA_SLOT}" \ + 2>/dev/null | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('Image', 'None'))" 2>/dev/null || echo "None") + +if [ "$CURRENT_IMAGE" = "$ISO_URL" ]; then + echo "✅ ISO is already mounted: $ISO_URL" + echo " ConnectedVia: $(curl -sk -u "${IDRAC_USER}:${IDRAC_PASS}" "https://${IDRAC_IP}/redfish/v1/Systems/${SYSTEM_ID}/VirtualMedia/${VIRTUAL_MEDIA_SLOT}" 2>/dev/null | python3 -c "import sys, json; print(json.load(sys.stdin).get('ConnectedVia', 'N/A'))" 2>/dev/null || echo "N/A")" + echo "" + echo "Skipping mount - ISO already in place." + exit 0 +fi + +# Eject existing media if any +if [ "$CURRENT_IMAGE" != "None" ] && [ "$CURRENT_IMAGE" != "null" ]; then + echo "⚠️ Ejecting existing media: $CURRENT_IMAGE" + curl -sk --max-time 10 --connect-timeout 5 -u "${IDRAC_USER}:${IDRAC_PASS}" \ + -X POST \ + "https://${IDRAC_IP}/redfish/v1/Systems/${SYSTEM_ID}/VirtualMedia/${VIRTUAL_MEDIA_SLOT}/Actions/VirtualMedia.EjectMedia" \ + -H "Content-Type: application/json" \ + -d '{}' \ + > /dev/null 2>&1 || echo " (Eject may have failed, continuing anyway...)" + sleep 2 +fi + +if [[ "$ISO_URL" =~ ^https:// ]]; then + TRANSFER_PROTOCOL="HTTPS" +elif [[ "$ISO_URL" =~ ^http:// ]]; then + TRANSFER_PROTOCOL="HTTP" +else + echo "❌ Unsupported ISO URL scheme: $ISO_URL" + exit 1 +fi + +# Mount the ISO +echo "Mounting ISO: $ISO_URL" +echo "" + +RESPONSE=$(curl -sk --max-time 30 --connect-timeout 10 -w "\n%{http_code}" -u "${IDRAC_USER}:${IDRAC_PASS}" \ + -X POST \ + "https://${IDRAC_IP}/redfish/v1/Systems/${SYSTEM_ID}/VirtualMedia/${VIRTUAL_MEDIA_SLOT}/Actions/VirtualMedia.InsertMedia" \ + -H "Content-Type: application/json" \ + -d "{ + \"Image\": \"${ISO_URL}\", + \"TransferMethod\": \"Stream\", + \"TransferProtocolType\": \"${TRANSFER_PROTOCOL}\" + }") + +HTTP_CODE=$(echo "$RESPONSE" | tail -n1) +BODY=$(echo "$RESPONSE" | sed '$d') + +if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "202" ] || [ "$HTTP_CODE" = "204" ]; then + echo "✅ ISO mounted successfully!" + echo "" + echo "Verifying mount..." + sleep 3 + + MOUNTED_IMAGE=$(curl -sk --max-time 10 --connect-timeout 5 -u "${IDRAC_USER}:${IDRAC_PASS}" \ + "https://${IDRAC_IP}/redfish/v1/Systems/${SYSTEM_ID}/VirtualMedia/${VIRTUAL_MEDIA_SLOT}" \ + 2>/dev/null | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('Image', 'None'))" 2>/dev/null || echo "None") + + CONNECTED_VIA=$(curl -sk --max-time 10 --connect-timeout 5 -u "${IDRAC_USER}:${IDRAC_PASS}" \ + "https://${IDRAC_IP}/redfish/v1/Systems/${SYSTEM_ID}/VirtualMedia/${VIRTUAL_MEDIA_SLOT}" \ + 2>/dev/null | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('ConnectedVia', 'N/A'))" 2>/dev/null || echo "N/A") + + if [ "$MOUNTED_IMAGE" = "$ISO_URL" ]; then + echo " Image: $MOUNTED_IMAGE" + echo " ConnectedVia: $CONNECTED_VIA" + echo "" + echo "✅ Ready for installation!" + exit 0 + else + echo "⚠️ Mount verification failed. Image: $MOUNTED_IMAGE" + exit 1 + fi +else + echo "❌ Failed to mount ISO. HTTP Code: $HTTP_CODE" + echo "Response: $BODY" + exit 1 +fi diff --git a/third_party/Dell/ubuntu-22.04/iac/terraform.tfvars b/third_party/Dell/ubuntu-22.04/iac/terraform.tfvars new file mode 100644 index 00000000..730cb8cf --- /dev/null +++ b/third_party/Dell/ubuntu-22.04/iac/terraform.tfvars @@ -0,0 +1,6 @@ +idrac_endpoint = "replace endpoint" +idrac_user = "replace idrac username" +idrac_password = "replace idrac password" +idrac_ssl_insecure = true +ubuntu_username = "provide username for ubunutu machine" +ubuntu_password = "provide password for ubuntu machine" \ No newline at end of file diff --git a/third_party/Dell/ubuntu-22.04/iac/variables.tf b/third_party/Dell/ubuntu-22.04/iac/variables.tf new file mode 100644 index 00000000..79f5d44e --- /dev/null +++ b/third_party/Dell/ubuntu-22.04/iac/variables.tf @@ -0,0 +1,80 @@ +variable "ubuntu_username" { + description = "Username for the default Ubuntu user" + type = string + default = "ubuntu" +} + +variable "ubuntu_password" { + description = "Password for the default Ubuntu user" + type = string + sensitive = true + default = "ChangeMe123!" +} + +variable "ubuntu_hostname" { + description = "Hostname for the Ubuntu system" + type = string + default = "ubuntu-server" +} + +variable "ssh_keys" { + description = "SSH public keys to add to the default user (optional, one per line)" + type = list(string) + default = [] +} + +variable "use_dhcp" { + description = "Use DHCP for network configuration (true) or static IP (false)" + type = bool + default = true +} + +variable "static_ip" { + description = "Static IP address (required if use_dhcp = false)" + type = string + default = "" +} + +variable "static_netmask" { + description = "Subnet mask in CIDR notation (e.g., 24 for 255.255.255.0)" + type = string + default = "24" +} + +variable "static_gateway" { + description = "Default gateway (required if use_dhcp = false)" + type = string + default = "" +} + +variable "static_dns" { + description = "DNS servers (list)" + type = list(string) + default = ["8.8.8.8", "8.8.4.4"] +} + +variable "idrac_endpoint" { + description = "iDRAC Redfish endpoint URL (e.g., https://100.67.153.16). Can also be set via TF_VAR_idrac_endpoint environment variable." + type = string + default = "https://100.67.153.16" +} + +variable "idrac_user" { + description = "iDRAC username. Can also be set via TF_VAR_idrac_user environment variable." + type = string + sensitive = true + default = "root" +} + +variable "idrac_password" { + description = "iDRAC password. Can also be set via TF_VAR_idrac_password environment variable." + type = string + sensitive = true + default = "calvin" +} + +variable "idrac_ssl_insecure" { + description = "Skip SSL certificate verification for iDRAC (use for self-signed certificates)" + type = bool + default = true +} diff --git a/third_party/Dell/ubuntu-22.04/iac/verify-installation.sh b/third_party/Dell/ubuntu-22.04/iac/verify-installation.sh new file mode 100644 index 00000000..42f744c2 --- /dev/null +++ b/third_party/Dell/ubuntu-22.04/iac/verify-installation.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Script to verify Ubuntu installation completion +# +# Environment variables: +# IDRAC_IP or IDRAC_HOST - iDRAC IP address or hostname (required) +# IDRAC_USER or IDRAC_USERNAME - iDRAC username (required) +# IDRAC_PASS or IDRAC_PASSWORD - iDRAC password (required) + +set -e + +# Read from environment variables with fallback options +IDRAC_IP="${IDRAC_IP:-${IDRAC_HOST}}" +IDRAC_USER="${IDRAC_USER:-${IDRAC_USERNAME}}" +IDRAC_PASS="${IDRAC_PASS:-${IDRAC_PASSWORD}}" + +# Validate required environment variables +if [ -z "$IDRAC_IP" ]; then + echo "❌ Error: IDRAC_IP or IDRAC_HOST environment variable is required" + exit 1 +fi + +if [ -z "$IDRAC_USER" ]; then + echo "❌ Error: IDRAC_USER or IDRAC_USERNAME environment variable is required" + exit 1 +fi + +if [ -z "$IDRAC_PASS" ]; then + echo "❌ Error: IDRAC_PASS or IDRAC_PASSWORD environment variable is required" + exit 1 +fi + +EXPECTED_HOSTNAME="ubuntu-server" # From terraform.tfvars + +echo "==========================================" +echo "Ubuntu Installation Verification" +echo "==========================================" +echo "" + +# Check hostname change +echo "1. Checking hostname (should change from MINWINPC)..." +HOSTNAME=$(curl -sk -u "${IDRAC_USER}:${IDRAC_PASS}" \ + "https://${IDRAC_IP}/redfish/v1/Systems/System.Embedded.1" \ + 2>/dev/null | python3 -c "import sys, json; print(json.load(sys.stdin).get('HostName', 'N/A'))" 2>/dev/null || echo "N/A") + +if [ "$HOSTNAME" != "MINWINPC" ] && [ "$HOSTNAME" != "N/A" ]; then + echo " ✅ Hostname changed to: $HOSTNAME" + echo " → Ubuntu installation appears successful!" +else + echo " ⚠️ Hostname still: $HOSTNAME (may still be installing)" +fi + +echo "" + +# Check boot override status (should revert after installation) +echo "2. Checking boot configuration..." +BOOT_OVERRIDE=$(curl -sk -u "${IDRAC_USER}:${IDRAC_PASS}" \ + "https://${IDRAC_IP}/redfish/v1/Systems/System.Embedded.1/Boot" \ + 2>/dev/null | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('BootSourceOverrideEnabled', 'N/A'))" 2>/dev/null || echo "N/A") + +if [ "$BOOT_OVERRIDE" = "Disabled" ]; then + echo " ✅ Boot override disabled (installation completed, system booted from installed OS)" +elif [ "$BOOT_OVERRIDE" = "Once" ]; then + echo " ⚠️ Boot override still active (installation may still be in progress)" +else + echo " ℹ️ Boot override status: $BOOT_OVERRIDE" +fi + +echo "" + +# Check power state +echo "3. Checking system power state..." +POWER_STATE=$(curl -sk -u "${IDRAC_USER}:${IDRAC_PASS}" \ + "https://${IDRAC_IP}/redfish/v1/Systems/System.Embedded.1" \ + 2>/dev/null | python3 -c "import sys, json; print(json.load(sys.stdin).get('PowerState', 'N/A'))" 2>/dev/null || echo "N/A") + +echo " Power State: $POWER_STATE" +if [ "$POWER_STATE" = "On" ]; then + echo " ✅ System is powered on" +else + echo " ⚠️ System power state: $POWER_STATE" +fi + +echo "" + +# Try to detect Ubuntu via network (if SSH is enabled) +echo "4. Network connectivity check..." +echo " Note: Try to SSH to the server if you know the IP:" +echo " ssh user@" +echo " Password: Linux123!" + +echo "" +echo "==========================================" +echo "Recommended: Use iDRAC Virtual Console" +echo "==========================================" +echo "1. Access: https://${IDRAC_IP}" +echo "2. Login with: ${IDRAC_USER} / ${IDRAC_PASS}" +echo "3. Open Virtual Console" +echo "4. You should see Ubuntu Desktop login screen" +echo "" From 6210fe4671eb49d56b8044d9e0e6a10f72e48eb5 Mon Sep 17 00:00:00 2001 From: Harika Date: Tue, 3 Feb 2026 16:26:07 -0600 Subject: [PATCH 02/35] Update Habana Container Runtime version requirement Signed-off-by: Harika --- .../Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md index 98c4637f..53be13ba 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md @@ -47,7 +47,7 @@ Before starting the deployment, ensure your system meets the following requireme | - **SPI / Preboot Firmware (Gaudi3**) | ≥1.22.0-fw-61.3.2-sec-3 | | - **Driver Version** | ≥1.21.3-f063886 | | - **NIC Driver Version** | ≥1.21.3-94c920f | -| - **Habana Container Runtime** | ≥ 1.22.1-6 | +| - **Habana Container Runtime** | ≥ 1.21.3 | #### Sudo Setup From 3dac4252ca9a871b32dfdee3fb8bfefba61fc682 Mon Sep 17 00:00:00 2001 From: Harika Date: Tue, 3 Feb 2026 16:26:26 -0600 Subject: [PATCH 03/35] Update Habana Container Runtime version in user guide Signed-off-by: Harika --- .../Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md index d096ffda..d1847643 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md @@ -46,7 +46,7 @@ Before starting the deployment, ensure your system meets the following requireme | **SPI / Preboot Firmware (Gaudi3**) | ≥1.22.0-fw-61.3.2-sec-3 | | **Driver Version** | ≥1.21.3-f063886 | | **NIC Driver Version** | ≥1.21.3-94c920f | -| **Habana Container Runtime** | ≥ 1.22.1-6 | +| **Habana Container Runtime** | ≥ 1.21.3 | | **Enterprise Inference Version** | release-1.4.0 | #### Sudo Setup From d4172016736910768d4bd1e4437e18dfb6665fb3 Mon Sep 17 00:00:00 2001 From: Harika Date: Tue, 3 Feb 2026 16:31:36 -0600 Subject: [PATCH 04/35] Apply suggestion from @alexsin368 Co-authored-by: alexsin368 <109180236+alexsin368@users.noreply.github.com> Signed-off-by: Harika --- .../Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md index d1847643..0aa3369f 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md @@ -33,7 +33,7 @@ It is designed for new users who may not be familiar with server configuration o ## Prerequisites Before starting the deployment, ensure your system meets the following requirements. -### 1.System Requirements +### 1. System Requirements | Requirement | Description | |--------------|-------------| From 1adc8a2f8561f6f164a093d231a5942c1e855c26 Mon Sep 17 00:00:00 2001 From: Harika Date: Tue, 3 Feb 2026 19:08:05 -0600 Subject: [PATCH 05/35] Revise user guide for Intel AI setup instructions Updated user guide for Intel AI for Enterprise Inference to clarify setup instructions and improve readability. Signed-off-by: Harika --- .../EI/single-node/user-guide-genai.md | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md index 0aa3369f..33fee8b6 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md @@ -18,7 +18,7 @@ --- ## Overview -This guide walks you through the setup and deployment of **Intel® AI for Enterprise Inference** in **single-node** environment. +This guide walks you through the setup and deployment of **Intel® AI for Enterprise Inference** in a **single-node** environment. It is designed for new users who may not be familiar with server configuration or AI inference deployment. **You’ll Learn How To:** @@ -47,7 +47,7 @@ Before starting the deployment, ensure your system meets the following requireme | **Driver Version** | ≥1.21.3-f063886 | | **NIC Driver Version** | ≥1.21.3-94c920f | | **Habana Container Runtime** | ≥ 1.21.3 | -| **Enterprise Inference Version** | release-1.4.0 | +| **Enterprise Inference Version** | release-1.4.0 or newer | #### Sudo Setup @@ -96,12 +96,17 @@ SSH keys are required to allow **Ansible** or automation scripts to connect secu mkdir -p ~/certs && cd ~/certs openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -days 365 -nodes -subj "/CN=api.example.com" ``` - This will generate: + `cert.pem` → certificate `key.pem` → private key -2. **Map your DNS to your local IP (only if not registered in DNS):** + > **Note:** + > `api.example.com` is used throughout this guide as a sample. + > Replace it with **your own fully qualified domain name (FQDN)** wherever it appears. + + +3. **Map your DNS to your local IP (only if not registered in DNS):** If your domain is not registered in DNS, you can map it manually by editing your /etc/hosts file ```bash @@ -116,7 +121,7 @@ SSH keys are required to allow **Ansible** or automation scripts to connect secu Save and exit with CTRL+X → Y → Enter. - **Important:** This manual mapping is only required if your machine’s hostname is not resolvable via DNS. + > **Note:** Replace api.example.com with the URL used to generate certs in above step , and this manual mapping is only required if your machine’s hostname is not resolvable via DNS. If your domain is already managed by a DNS provider, skip this step. ### 4. Hugging Face Token Setup @@ -135,8 +140,9 @@ This section explains how to deploy Intel® AI for Enterprise Inference on a sin ```bash git clone https://github.com/opea-project/Enterprise-Inference.git cd Enterprise-Inference -git checkout release-1.4.0 +git checkout ${RELEASE} ``` +> **Note:** Update the RELEASE environment variable to point to the desired Enterprise Inference version(for example: release-1.4.0) ### 2. Configure the Setup Files and Environment @@ -152,7 +158,7 @@ vi core/inventory/inference-config.cfg ``` ``` -cluster_url=api.example.com +cluster_url=api.example.com # <-- Replace with your own FQDN cert_file=~/certs/cert.pem key_file=~/certs/key.pem keycloak_client_id=api @@ -161,7 +167,7 @@ keycloak_admin_password=changeme!! hugging_face_token=your_hugging_face_token hugging_face_token_falcon3=your_hugging_face_token models= -cpu_or_gpu=gaudi3 +cpu_or_gpu=gaudi3 vault_pass_code=place-holder-123 deploy_kubernetes_fresh=on deploy_ingress_controller=on @@ -173,6 +179,8 @@ deploy_ceph=off deploy_istio=off uninstall_ceph=off ``` +> **Note:** Replace cluster_url must match the DNS name. + **Update hosts.yaml File** Copy the single node preset hosts config file to the working directory: @@ -184,7 +192,11 @@ cp -f docs/examples/single-node/hosts.yaml core/inventory/hosts.yaml ### 3. Run the Deployment -Run the setup for Gaudi _(the "models" and "cpu-or-gpu" parameters are only needed if they are not set in inference-config.cfg)_: +> **Note:** +> The `--models` argument selects a model using its **numeric ID** +> If `--models` is omitted, the installer displays the full model list and prompts you to select a model interactively. + +**Run the setup for Gaudi:** ```bash cd core @@ -192,8 +204,7 @@ chmod +x inference-stack-deploy.sh ./inference-stack-deploy.sh --models "1" --cpu-or-gpu "gaudi3" ``` -Run the setup for CPU _(the "models" and "cpu-or-gpu" parameters are only needed if they are not set in inference-config.cfg)_: - +**Run the setup for CPU:** ```bash cd core chmod +x inference-stack-deploy.sh @@ -207,7 +218,7 @@ If using Intel® Gaudi® hardware, make sure firmware and drivers are updated be ### 4. Verify the Deployment Verify Pods Status ```bash -kubectl get pods +kubectl get pods -A ``` Expected States: - All pods Running From 1d50ef618adbf1cac0b60542df8b6db939a36770 Mon Sep 17 00:00:00 2001 From: Harika Date: Wed, 4 Feb 2026 09:41:00 -0600 Subject: [PATCH 06/35] update readme & apisix user-gude Signed-off-by: Harika --- .../EI/single-node/user-guide-apisix.md | 29 +++++++++++++------ .../EI/single-node/user-guide-genai.md | 4 +-- third_party/Dell/ubuntu-22.04/iac/README.md | 8 ++--- .../Dell/ubuntu-22.04/iac/variables.tf | 1 - 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md index 53be13ba..d13b84df 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md @@ -19,7 +19,7 @@ --- ## Overview -This guide walks you through the setup and deployment of **Intel® AI for Enterprise Inference** in **single-node** environment. +This guide walks you through the setup and deployment of **Intel® AI for Enterprise Inference** in a **single-node** environment. It is designed for new users who may not be familiar with server configuration or AI inference deployment. **You’ll Learn How To:** @@ -101,6 +101,10 @@ SSH keys are required to allow **Ansible** or automation scripts to connect secu `cert.pem` → certificate `key.pem` → private key + > **Note:** + > `api.example.com` is used throughout this guide as a sample. + > Replace it with **your own fully qualified domain name (FQDN)** wherever it appears. + 2. **Map your DNS to your local IP (only if not registered in DNS):** If your domain is not registered in DNS, you can map it manually by editing your /etc/hosts file @@ -116,8 +120,8 @@ SSH keys are required to allow **Ansible** or automation scripts to connect secu Save and exit with CTRL+X → Y → Enter. - **Important:** This manual mapping is only required if your machine’s hostname is not resolvable via DNS. - If your domain is already managed by a DNS provider, skip this step. + > **Note:** Replace api.example.com with the URL used to generate certs in above step , and this manual mapping is only required if your machine’s hostname is not resolvable via DNS. + > If your domain is already managed by a DNS provider, skip this step. ### 4. Hugging Face Token Setup 1. Visit huggingface.com and log in (or create an account). @@ -142,8 +146,9 @@ This section explains how to deploy Intel® AI for Enterprise Inference on a sin cd ~ git clone https://github.com/opea-project/Enterprise-Inference.git cd Enterprise-Inference -git checkout release-1.4.0 +git checkout ${RELEASE} ``` +> **Note:** Update the RELEASE environment variable to point to the desired Enterprise Inference version(for example: release-1.4.0) --- @@ -164,7 +169,7 @@ vi core/inventory/inference-config.cfg Sample default values (insert your token) for a full deployment of the inference stack with Llama-8B model. ``` -cluster_url=api.example.com +cluster_url=api.example.com # <-- Replace with your own FQDN cert_file=~/certs/cert.pem key_file=~/certs/key.pem keycloak_client_id=api @@ -172,7 +177,7 @@ keycloak_admin_user=api-admin keycloak_admin_password=changeme!! hugging_face_token=your_hugging_face_token hugging_face_token_falcon3=your_hugging_face_token -models=1 +models= cpu_or_gpu=gaudi3 vault_pass_code=place-holder-123 deploy_kubernetes_fresh=on @@ -185,6 +190,7 @@ deploy_ceph=off deploy_istio=off uninstall_ceph=off ``` +> **Note:** Replace cluster_url with your DNS , it must match with DNS used in certs generation. To support non-interactive execution of inference-stack-deploy.sh, create a file named "core/inentory/.become-passfile" with your user's sudo password: @@ -193,6 +199,7 @@ vi core/inentory/.become-passfile chmod 600 core/inentory/.become-passfile ``` **Update hosts.yaml File** + Copy the single node preset hosts config file to the working directory: ```bash cp -f docs/examples/single-node/hosts.yaml core/inventory/hosts.yaml @@ -202,7 +209,11 @@ cp -f docs/examples/single-node/hosts.yaml core/inventory/hosts.yaml ### 3. Run the Deployment -Run the setup for Gaudi _(the "models" and "cpu-or-gpu" parameters are only needed if they are not set in inference-config.cfg)_: +> **Note:** +> The `--models` argument selects a model using its **numeric ID** +> If `--models` is omitted, the installer displays the full model list and prompts you to select a model interactively. + +Run the setup for Gaudi ```bash cd core @@ -210,7 +221,7 @@ chmod +x inference-stack-deploy.sh ./inference-stack-deploy.sh --models "1" --cpu-or-gpu "gaudi3" ``` -Run the setup for CPU _(the "models" and "cpu-or-gpu" parameters are only needed if they are not set in inference-config.cfg)_: +Run the setup for CPU ```bash cd core @@ -225,7 +236,7 @@ If using Intel® Gaudi® hardware, make sure firmware and drivers are updated be Verify Pods Status ```bash -kubectl get pods +kubectl get pods -A ``` Expected States: - All pods Running diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md index 33fee8b6..3fe7beac 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md @@ -122,7 +122,7 @@ SSH keys are required to allow **Ansible** or automation scripts to connect secu Save and exit with CTRL+X → Y → Enter. > **Note:** Replace api.example.com with the URL used to generate certs in above step , and this manual mapping is only required if your machine’s hostname is not resolvable via DNS. - If your domain is already managed by a DNS provider, skip this step. + > If your domain is already managed by a DNS provider, skip this step. ### 4. Hugging Face Token Setup 1. Visit huggingface.com and log in (or create an account). @@ -179,7 +179,7 @@ deploy_ceph=off deploy_istio=off uninstall_ceph=off ``` -> **Note:** Replace cluster_url must match the DNS name. +> **Note:** Replace cluster_url with your DNS , it must match with DNS used in certs generation. **Update hosts.yaml File** diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md index 032d50c5..2712cba9 100644 --- a/third_party/Dell/ubuntu-22.04/iac/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -55,10 +55,8 @@ idrac_endpoint = "https://100.67.x.x" idrac_user = "root" idrac_password = "calvin" idrac_ssl_insecure = true - -ubuntu_username = "user" -ubuntu_hostname = "inference-node" -use_dhcp = true +ubuntu_username = "user" +ubuntu_password = "password" ``` **Apply Terraform** @@ -105,7 +103,7 @@ sudo ./deploy-enterprise-inference.sh \ | Option | Description | | -------| ------------ | | -u | OS username | -| -p | OS userpassword | +| -p | OS userpassword | | -t | Hugging Face token | | -g | gaudi3 or cpu | | -m | Model IDs | diff --git a/third_party/Dell/ubuntu-22.04/iac/variables.tf b/third_party/Dell/ubuntu-22.04/iac/variables.tf index 79f5d44e..d323abf1 100644 --- a/third_party/Dell/ubuntu-22.04/iac/variables.tf +++ b/third_party/Dell/ubuntu-22.04/iac/variables.tf @@ -56,7 +56,6 @@ variable "static_dns" { variable "idrac_endpoint" { description = "iDRAC Redfish endpoint URL (e.g., https://100.67.153.16). Can also be set via TF_VAR_idrac_endpoint environment variable." type = string - default = "https://100.67.153.16" } variable "idrac_user" { From 4c4a07ef9a18c10bfda155a10e2219c4aa3a29ae Mon Sep 17 00:00:00 2001 From: Harika Date: Wed, 4 Feb 2026 18:19:51 -0600 Subject: [PATCH 07/35] update deploy-enterpise-inference script Signed-off-by: Harika --- third_party/Dell/ubuntu-22.04/iac/README.md | 18 +- .../iac/deploy-enterprise-inference.sh | 357 +++++++++++++----- 2 files changed, 285 insertions(+), 90 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md index 2712cba9..7bbe5840 100644 --- a/third_party/Dell/ubuntu-22.04/iac/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -109,12 +109,28 @@ sudo ./deploy-enterprise-inference.sh \ | -m | Model IDs | | -b | Repo branch (default: release-1.4.0) | | -r | Resume from last checkpoint | +| -d | keycloak or genai, by default set to keycloak | +| -o | off or on, by default observability set to off | **Resume After Failure** The deployment script is resume-safe. If a failure occurs, simply rerun the script with the -r flag: ```bash -sudo ./deploy-enterprise-inference.sh -r +sudo ./deploy-enterprise-inference.sh \ +-u user \ +-p Linux123! \ +-t hf_XXXXXXXXXXXX \ +-g gaudi3 \ +-m "1" \ +-r +``` + +**To uninstall this deployment** + +Below command will delete pods, uninstalls Enterprise Inference stack and state file + +```bash +sudo ./deploy-enterprise-inference.sh -u user uninstall ``` **State is tracked in:** diff --git a/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh b/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh index 43d1668b..51471097 100644 --- a/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh +++ b/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh @@ -6,6 +6,7 @@ # # Usage: # ./deploy-enterprise-inference.sh -u -t [OPTIONS] +# ./deploy-enterprise-inference.sh uninstall -u [OPTIONS] # # Options: # -u, --username Enterprise Inference owner username (required) @@ -15,8 +16,8 @@ # -m, --models Model IDs to deploy, comma-separated (default: "5") # -b, --branch Git branch to clone (default: dell-deploy) # -f, --firmware-version Firmware version (default: 1.22.1) -# -d, --deployment-mode Deployment mode: 'keycloak' or 'genai' (default: keycloak) -# -o, --observability Enable observability: 'on' or 'off' (default: off) +# -d, --deployment-mode Deployment mode: 'keycloak' or 'genai' (default: keycloak) +# -o, --observability Enable observability: 'on' or 'off' (default: off) # -r, --resume Resume from checkpoint (auto-detected if state file exists) # -s, --state-file State file path (default: /tmp/ei-deploy.state) # -h, --help Show this help message @@ -24,10 +25,11 @@ # Example: # ./deploy-enterprise-inference.sh -u user -t hf_xxxxxxxxxxxxx -g gaudi3 -m "5" # ./deploy-enterprise-inference.sh -u user -t hf_xxxxxxxxxxxxx -g cpu -m "1" -d genai -o on +# ./deploy-enterprise-inference.sh uninstall -u user + set -euo pipefail # Default values - USERNAME="Replace-with-your-username" HF_TOKEN="Replace-with-your-hugging face token" USER_PASSWORD="Replace-with-your-user-password" @@ -35,14 +37,39 @@ GPU_TYPE="Enter gaudi3/cpu based on your deployment" MODELS="Enter Model number" DEPLOYMENT_MODE="keycloak" DEPLOY_OBSERVABILITY="off" -BRANCH="release-1.4.0" -REPO_URL="https://github.com/opea-project/Enterprise-Inference" KEYCLOAK_CLIENT_ID="api" KEYCLOAK_ADMIN_USER="api-admin" KEYCLOAK_ADMIN_PASSWORD="changeme!!" FIRMWARE_VERSION="1.22.1" STATE_FILE="/tmp/ei-deploy.state" +BRANCH="release-1.4.0" +REPO_URL="https://github.com/opea-project/Enterprise-Inference" RESUME=false +ACTION="deploy" + +# Model ID mapping (numeric selector -> Hugging Face model id) +declare -A MODEL_MAP=( + ["1"]="meta-llama/Llama-3.1-8B-Instruct" + ["2"]="meta-llama/Llama-3.1-70B-Instruct" + ["3"]="meta-llama/Llama-3.1-405B-Instruct" + ["4"]="meta-llama/Llama-3.3-70B-Instruct" + ["5"]="meta-llama/Llama-4-Scout-17B-16E-Instruct" + ["6"]="Qwen/Qwen2.5-32B-Instruct" + ["7"]="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" + ["8"]="deepseek-ai/DeepSeek-R1-Distill-Llama-8B" + ["9"]="mistralai/Mixtral-8x7B-Instruct-v0.1" + ["10"]="mistralai/Mistral-7B-Instruct-v0.3" + ["11"]="BAAI/bge-base-en-v1.5" + ["12"]="BAAI/bge-reranker-base" + ["13"]="codellama/CodeLlama-34b-Instruct-hf" + ["14"]="tiiuae/Falcon3-7B-Instruct" + ["21"]="meta-llama/Llama-3.1-8B-Instruct" + ["22"]="meta-llama/Llama-3.2-3B-Instruct" + ["23"]="deepseek-ai/DeepSeek-R1-Distill-Llama-8B" + ["24"]="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" + ["25"]="Qwen/Qwen3-1.7B" + ["26"]="Qwen/Qwen3-4B-Instruct-2507" +) # Colors for output RED='\033[0;31m' @@ -68,30 +95,122 @@ log_error() { echo -e "${RED}[ERROR]${NC} $1" >&2 } +# Hugging Face token checks for selected model numbers +check_hf_token_access() { + log_info "Validating Hugging Face token..." + local response http_code body + response=$(curl -sS -w $'\n%{http_code}' \ + -H "Authorization: Bearer ${HF_TOKEN}" \ + "https://huggingface.co/api/whoami-v2") + http_code="${response##*$'\n'}" + body="${response%$'\n'*}" + if [[ "$http_code" != "200" ]]; then + log_error "Hugging Face token validation failed (HTTP ${http_code})" + echo "$body" + exit 1 + fi + log_success "Hugging Face token is valid" + + if [[ -z "${MODELS:-}" ]]; then + log_warn "No model numbers provided; skipping model access checks" + return 0 + fi + + IFS=',' read -r -a model_numbers <<< "${MODELS}" + local model_ids=() + for num in "${model_numbers[@]}"; do + num="$(echo "$num" | xargs)" + if [[ -z "$num" ]]; then + continue + fi + if [[ -z "${MODEL_MAP[$num]:-}" ]]; then + log_warn "Unknown model number '${num}' (no mapping found)" + continue + fi + model_ids+=("${MODEL_MAP[$num]}") + done + + if [[ ${#model_ids[@]} -eq 0 ]]; then + log_warn "No valid model numbers found; skipping model access checks" + return 0 + fi + + log_info "Checking model access for selected numbers..." + local seen_ids=() + for model_id in "${model_ids[@]}"; do + if [[ " ${seen_ids[*]} " == *" ${model_id} "* ]]; then + continue + fi + seen_ids+=("${model_id}") + log_info "Model: ${model_id}" + local model_code + model_code=$(curl -sS -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer ${HF_TOKEN}" \ + "https://huggingface.co/api/models/${model_id}") + if [[ "$model_code" == "200" ]]; then + log_success "Access confirmed for ${model_id}" + continue + fi + if [[ "$model_code" == "401" || "$model_code" == "403" ]]; then + log_error "Model is gated or token lacks access: ${model_id} (HTTP ${model_code})" + exit 1 + fi + log_error "Unable to access model '${model_id}' (HTTP ${model_code})" + exit 1 + done +} + +update_inference_config() { + if [[ -f "$CONFIG_FILE" ]]; then + sed -i -E \ + -e 's/^[[:space:]]*hugging_face_token[[:space:]]*=.*/hugging_face_token='${HF_TOKEN}'/' \ + -e 's/^[[:space:]]*models[[:space:]]*=.*/models='${MODELS}'/' \ + -e 's/^[[:space:]]*cpu_or_gpu[[:space:]]*=.*/cpu_or_gpu='${GPU_TYPE}'/' \ + -e 's/^[[:space:]]*keycloak_client_id[[:space:]]*=.*/keycloak_client_id='${KEYCLOAK_CLIENT_ID}'/' \ + -e 's/^[[:space:]]*keycloak_admin_user[[:space:]]*=.*/keycloak_admin_user='${KEYCLOAK_ADMIN_USER}'/' \ + -e 's/^[[:space:]]*keycloak_admin_password[[:space:]]*=.*/keycloak_admin_password='${KEYCLOAK_ADMIN_PASSWORD}'/' \ + -e 's/^[[:space:]]*deploy_keycloak_apisix[[:space:]]*=.*/deploy_keycloak_apisix='${DEPLOY_KEYCLOAK_APISIX}'/' \ + -e 's/^[[:space:]]*deploy_genai_gateway[[:space:]]*=.*/deploy_genai_gateway='${DEPLOY_GENAI_GATEWAY}'/' \ + -e 's/^[[:space:]]*deploy_observability[[:space:]]*=.*/deploy_observability='${DEPLOY_OBSERVABILITY}'/' \ + "$CONFIG_FILE" + log_info "Updated inference-config.cfg with models='${MODELS}' and cpu_or_gpu=${GPU_TYPE}" + else + log_warn "inference-config.cfg not found at $CONFIG_FILE, skipping update." + fi +} + # Usage function usage() { cat << EOF Usage: $0 -u -t [OPTIONS] + $0 uninstall -u [OPTIONS] -Required Options: +Required Options (deploy): -u, --username Enterprise Inference owner username -t, --token Hugging Face token +Required Options (uninstall): + -u, --username Enterprise Inference owner username + Optional Options: -p, --password User sudo password for Ansible (default: Linux123!) -g, --gpu-type GPU type: 'gaudi3' or 'cpu' (default: gaudi3) -m, --models Model IDs to deploy, comma-separated (default: "1") -b, --branch Git branch to clone (default: dell-deploy) -f, --firmware-version Firmware version (default: 1.22.1) - -d, --deployment-mode Deployment mode: 'keycloak' or 'genai' (default: keycloak) - -o, --observability Enable observability: 'on' or 'off' (default: off) + -d, --deployment-mode Deployment mode: 'keycloak' or 'genai' (default: keycloak) + -o, --observability Enable observability: 'on' or 'off' (default: off) -s, --state-file State file path (default: /tmp/ei-deploy.state) -r, --resume Force resume from checkpoint -h, --help Show this help message +Notes: + Model numbers map to Hugging Face model IDs defined in MODEL_MAP. + Example: $0 -u user -t hf_xxxxxxxxxxxxx -g gaudi3 -m "1" $0 -u user -t hf_xxxxxxxxxxxxx -g cpu -m "1" -d genai -o on + $0 uninstall -u user EOF exit 1 } @@ -99,6 +218,10 @@ EOF # Parse command line arguments while [[ $# -gt 0 ]]; do case $1 in + uninstall) + ACTION="uninstall" + shift + ;; -u|--username) USERNAME="$2" shift 2 @@ -154,15 +277,21 @@ while [[ $# -gt 0 ]]; do done # Validate required parameters -if [[ -z "$USERNAME" ]] || [[ -z "$HF_TOKEN" ]]; then - log_error "Username and Hugging Face token are required" +if [[ -z "$USERNAME" ]]; then + log_error "Username is required" + usage +fi +if [[ "$ACTION" == "deploy" ]] && [[ -z "$HF_TOKEN" ]]; then + log_error "Hugging Face token is required for deployment" usage fi -# Validate GPU type -if [[ "$GPU_TYPE" != "gaudi3" ]] && [[ "$GPU_TYPE" != "cpu" ]]; then - log_error "GPU type must be 'gaudi3' or 'cpu'" - exit 1 +# Validate GPU type (deploy only) +if [[ "$ACTION" == "deploy" ]]; then + if [[ "$GPU_TYPE" != "gaudi3" ]] && [[ "$GPU_TYPE" != "cpu" ]]; then + log_error "GPU type must be 'gaudi3' or 'cpu'" + exit 1 + fi fi # Validate deployment mode @@ -176,7 +305,6 @@ if [[ "$DEPLOY_OBSERVABILITY" != "on" ]] && [[ "$DEPLOY_OBSERVABILITY" != "off" log_error "Observability must be 'on' or 'off'" exit 1 fi - # Set deployment variables based on deployment mode set_deployment_variables() { case "$DEPLOYMENT_MODE" in @@ -196,7 +324,7 @@ set_deployment_variables # Check if running with root/sudo privileges if [[ $EUID -ne 0 ]]; then - log_error "This script must be run with sudo privileges" + log_error "This script must be run with sudo/root privileges" log_error "Please run: sudo $0 $*" exit 1 fi @@ -229,7 +357,6 @@ if [[ -f "$STATE_FILE" ]] || [[ "$RESUME" == true ]]; then log_info "State file found. Resuming from checkpoint..." source "$STATE_FILE" RESUME=true - # Re-apply deployment variables based on saved deployment mode set_deployment_variables else log_warn "Resume requested but no state file found. Starting fresh." @@ -239,6 +366,13 @@ else RESUME=false fi +FORCE_INTERACTIVE_DEPLOY=false +if [[ "$RESUME" == true ]] && [[ "${LAST_COMPLETED_STEP:-}" == "deploy_stack" ]]; then + log_info "Previous deployment detected in state file; skipping setup steps" + LAST_COMPLETED_STEP="certificates" + FORCE_INTERACTIVE_DEPLOY=true +fi + # State management functions save_state() { local step=$1 @@ -300,21 +434,55 @@ skip_if_completed() { return 1 } -log_warn "Have you verified that your Hugging Face token has access to the model(s): ${MODELS} ?" -if [[ -t 0 ]]; then - read -r -p "Type 'yes' to continue or anything else to exit: " HF_CONFIRM -else - log_warn "Non-interactive shell detected, auto-continuing Hugging Face check" - HF_CONFIRM="yes" -fi - -if [[ "${HF_CONFIRM}" != "yes" ]]; then - log_error "Please verify Hugging Face model access before running the deployment." - exit 1 -fi - # Main deployment steps main() { + if [[ "$ACTION" == "uninstall" ]]; then + log_info "==========================================" + log_info "Enterprise Inference Stack Uninstall" + log_info "==========================================" + log_info "Username: $USERNAME" + log_info "Branch: $BRANCH" + log_info "State File: $STATE_FILE" + log_info "==========================================" + echo "" + + if [[ -f "$STATE_FILE" ]]; then + rm -f "$STATE_FILE" + log_info "State file removed" + fi + + if [[ ! -f "/home/${USERNAME}/Enterprise-Inference/core/inference-stack-deploy.sh" ]]; then + log_error "inference-stack-deploy.sh not found at /home/${USERNAME}/Enterprise-Inference/core" + exit 1 + fi + + log_info "Running inference-stack-deploy.sh decommission..." + UNINSTALL_OUTPUT=$(su "${USERNAME}" -c "cd /home/${USERNAME}/Enterprise-Inference/core && echo -e '2\nyes\nlatest\nyes' | bash ./inference-stack-deploy.sh" 2>&1) || { + log_error "Enterprise Inference Stack uninstall failed!" + echo "$UNINSTALL_OUTPUT" + exit 1 + } + + if echo "$UNINSTALL_OUTPUT" | grep -q "Reset operation cancelled"; then + log_error "Uninstall was cancelled by the prompt." + echo "$UNINSTALL_OUTPUT" + exit 1 + fi + + if [[ -d "/home/${USERNAME}/Enterprise-Inference" ]]; then + if [[ -f "/home/${USERNAME}/Enterprise-Inference/core/inference-stack-deploy.sh" ]]; then + rm -f "/home/${USERNAME}/Enterprise-Inference/core/inference-stack-deploy.sh" + log_info "Removed inference-stack-deploy.sh" + fi + log_info "Removing /home/${USERNAME}/Enterprise-Inference..." + rm -rf "/home/${USERNAME}/Enterprise-Inference" + log_success "Enterprise-Inference directory removed" + fi + + log_success "Uninstall completed successfully!" + exit 0 + fi + log_info "==========================================" log_info "Enterprise Inference Stack Deployment" log_info "==========================================" @@ -331,6 +499,8 @@ main() { log_info "==========================================" echo "" + check_hf_token_access + # Step 1: Install system packages if ! skip_if_completed "system_packages"; then log_info "Step 1: Installing system packages..." @@ -349,26 +519,17 @@ main() { log_warn "Enterprise-Inference directory already exists. Skipping clone..." else cd "/home/${USERNAME}" - su "${USERNAME}" -c "git clone ${REPO_URL}" - su "${USERNAME}" -c "cd /home/${USERNAME}/Enterprise-Inference && git checkout ${BRANCH}" + su "${USERNAME}" -c "git clone --depth 1 --branch ${BRANCH} ${REPO_URL}" log_success "Repository cloned" fi - log_info "Applying single-node inventory defaults..." - SRC_BASE="/home/${USERNAME}/Enterprise-Inference/docs/examples/single-node" - DEST_BASE="/home/${USERNAME}/Enterprise-Inference/core/inventory" - - if [[ -d "$SRC_BASE" ]] && [[ -d "$DEST_BASE" ]]; then - cp -f "${SRC_BASE}/hosts.yaml" "${DEST_BASE}/hosts.yaml" - chown "${USERNAME}:${USERNAME}" "${DEST_BASE}/hosts.yaml" - log_success "Single-node hosts.yaml applied" - else - log_warn "Single-node example for hosts not found, skipping copy" - fi - # Create .become-passfile for Ansible (empty since we configure NOPASSWD) log_info "Creating Ansible become-passfile..." INVENTORY_DIR="/home/${USERNAME}/Enterprise-Inference/core/inventory" + if [[ ! -d "$INVENTORY_DIR" ]]; then + # Try alternative spelling (typo in some versions) + INVENTORY_DIR="/home/${USERNAME}/Enterprise-Inference/core/inventory" + fi if [[ -d "$INVENTORY_DIR" ]]; then BECOME_PASSFILE="${INVENTORY_DIR}/.become-passfile" @@ -380,17 +541,6 @@ main() { else log_warn "Inventory directory not found at ${INVENTORY_DIR}, will create later" fi - - HOSTS_FILE="/home/${USERNAME}/Enterprise-Inference/core/inventory/hosts.yaml" - - if [[ -f "$HOSTS_FILE" ]]; then - log_info "Updating ansible_user in hosts.yaml to '${USERNAME}'" - - sed -i -E "/^[[:space:]]*master1:/,/^[[:space:]]{2}children:/ s/^([[:space:]]*ansible_user:[[:space:]]*).*/\1${USERNAME}/" "$HOSTS_FILE" - else - log_warn "hosts.yaml not found at ${HOSTS_FILE}, skipping ansible_user update" - fi - if [[ "$GPU_TYPE" == "cpu" ]] || [[ "$GPU_TYPE" == "gaudi3" ]]; then CONFIG_FILE="/home/${USERNAME}/Enterprise-Inference/core/inventory/inference-config.cfg" if [[ -f "$CONFIG_FILE" ]]; then @@ -405,29 +555,30 @@ main() { -e 's/^[[:space:]]*deploy_genai_gateway[[:space:]]*=.*/deploy_genai_gateway='${DEPLOY_GENAI_GATEWAY}'/' \ -e 's/^[[:space:]]*deploy_observability[[:space:]]*=.*/deploy_observability='${DEPLOY_OBSERVABILITY}'/' \ "$CONFIG_FILE" - log_info "Updated inference-config.cfg with models='${MODELS}' and cpu_or_gpu=cpu" + log_info "Updated inference-config.cfg with models='${MODELS}' and cpu_or_gpu=${GPU_TYPE}" else log_warn "inference-config.cfg not found at $CONFIG_FILE, skipping update." - fi + fi + fi + + # ------------------------------------------------------------ + # Disable NRI explicitly for CPU-only deployments + # ------------------------------------------------------------ + if [[ "$GPU_TYPE" == "cpu" ]]; then + log_info "CPU-only mode detected — disabling NRI and CPU balloons" + + # Update if keys exist + sed -i -E \ + -e 's/^[[:space:]]*enable_nri[[:space:]]*=.*/enable_nri=false/' \ + -e 's/^[[:space:]]*enable_cpu_balloons[[:space:]]*=.*/enable_cpu_balloons=false/' \ + "$CONFIG_FILE" || true + + # Append if keys do not exist + grep -q '^enable_nri=' "$CONFIG_FILE" || echo 'enable_nri=false' >> "$CONFIG_FILE" + grep -q '^enable_cpu_balloons=' "$CONFIG_FILE" || echo 'enable_cpu_balloons=false' >> "$CONFIG_FILE" + + log_success "NRI disabled for CPU-only deployment" fi - # ------------------------------------------------------------ - # Disable NRI explicitly for CPU-only deployments - # ------------------------------------------------------------ - if [[ "$GPU_TYPE" == "cpu" ]]; then - log_info "CPU-only mode detected — disabling NRI and CPU balloons" - - # Update if keys exist - sed -i -E \ - -e 's/^[[:space:]]*enable_nri[[:space:]]*=.*/enable_nri=false/' \ - -e 's/^[[:space:]]*enable_cpu_balloons[[:space:]]*=.*/enable_cpu_balloons=false/' \ - "$CONFIG_FILE" || true - - # Append if keys do not exist - grep -q '^enable_nri=' "$CONFIG_FILE" || echo 'enable_nri=false' >> "$CONFIG_FILE" - grep -q '^enable_cpu_balloons=' "$CONFIG_FILE" || echo 'enable_cpu_balloons=false' >> "$CONFIG_FILE" - - log_success "NRI disabled for CPU-only deployment" - fi save_state "clone_repo" fi @@ -567,6 +718,20 @@ main() { log_info "Sudo NOPASSWD already configured for ${USERNAME}" fi + + log_info "Applying single-node inventory defaults..." + SRC_BASE="/home/${USERNAME}/Enterprise-Inference/docs/examples/single-node" + DEST_BASE="/home/${USERNAME}/Enterprise-Inference/core/inventory" + + if [[ -d "$SRC_BASE" ]] && [[ -d "$DEST_BASE" ]]; then + cp -f "${SRC_BASE}/hosts.yaml" "${DEST_BASE}/hosts.yaml" + chown "${USERNAME}:${USERNAME}" "${DEST_BASE}/hosts.yaml" + log_success "Single-node hosts.yaml applied" + else + log_warn "Single-node example for hosts not found, skipping copy" + fi + + # Ensure .become-passfile exists (created in Step 2, but verify here) log_info "Verifying Ansible become-passfile..." INVENTORY_DIR="/home/${USERNAME}/Enterprise-Inference/core/inventory" @@ -594,6 +759,16 @@ main() { log_warn "Inventory directory not found at ${INVENTORY_DIR}" fi + HOSTS_FILE="/home/${USERNAME}/Enterprise-Inference/core/inventory/hosts.yaml" + + if [[ -f "$HOSTS_FILE" ]]; then + log_info "Updating ansible_user in hosts.yaml to '${USERNAME}'" + + sed -i -E "/^[[:space:]]*master1:/,/^[[:space:]]{2}children:/ s/^([[:space:]]*ansible_user:[[:space:]]*).*/\1${USERNAME}/" "$HOSTS_FILE" + else + log_warn "hosts.yaml not found at ${HOSTS_FILE}, skipping ansible_user update" + fi + # Export Hugging Face token export HUGGINGFACE_TOKEN="${HF_TOKEN}" @@ -601,12 +776,23 @@ main() { log_info "Parameters: --models '${MODELS}' --cpu-or-gpu '${GPU_TYPE}' --hugging-face-token " # Run the deployment script - # Using echo to provide input: "1" for "Provision Enterprise Inference Cluster", "yes" for confirmation - su "${USERNAME}" -c "cd /home/${USERNAME}/Enterprise-Inference/core && echo -e '1\nyes' | bash ./inference-stack-deploy.sh --models '${MODELS}' --cpu-or-gpu '${GPU_TYPE}' --hugging-face-token ${HUGGINGFACE_TOKEN}" || { - log_error "Enterprise Inference Stack deployment failed!" - log_warn "You can resume by running this script again with -r flag" - exit 1 - } + if [[ "$FORCE_INTERACTIVE_DEPLOY" == true ]]; then + log_info "State file indicates a prior deployment; running interactively" + CONFIG_FILE="/home/${USERNAME}/Enterprise-Inference/core/inventory/inference-config.cfg" + update_inference_config + su "${USERNAME}" -c "cd /home/${USERNAME}/Enterprise-Inference/core && bash ./inference-stack-deploy.sh --cpu-or-gpu '${GPU_TYPE}' --hugging-face-token ${HUGGINGFACE_TOKEN}" || { + log_error "Enterprise Inference Stack deployment failed!" + log_warn "You can resume by running this script again with -r flag" + exit 1 + } + else + # Using echo to provide input: "1" for "Provision Enterprise Inference Cluster", "yes" for confirmation + su "${USERNAME}" -c "cd /home/${USERNAME}/Enterprise-Inference/core && echo -e '1\nyes' | bash ./inference-stack-deploy.sh --models '${MODELS}' --cpu-or-gpu '${GPU_TYPE}' --hugging-face-token ${HUGGINGFACE_TOKEN}" || { + log_error "Enterprise Inference Stack deployment failed!" + log_warn "You can resume by running this script again with -r flag" + exit 1 + } + fi log_success "Enterprise Inference Stack deployed successfully!" save_state "deploy_stack" @@ -621,13 +807,6 @@ main() { log_success "==========================================" log_success "Deployment completed successfully!" log_success "==========================================" - log_info "" - log_info "Next steps:" - log_info "1. If kernel configuration was changed, reboot the system" - log_info "2. Verify the deployment using the verification commands" - log_info "3. Test the inference endpoints" - log_info "" - log_info "Deployment Summary:" log_info " - Mode: ${DEPLOYMENT_MODE}" log_info " - GPU Type: ${GPU_TYPE}" log_info " - Models: ${MODELS}" @@ -635,4 +814,4 @@ main() { } # Run main function -main "$@" +main "$@" \ No newline at end of file From 3af744a9f5bbc8f43b591cf6a2f3242354e15d80 Mon Sep 17 00:00:00 2001 From: Harika Date: Wed, 4 Feb 2026 18:25:06 -0600 Subject: [PATCH 08/35] Enhance README with ISO mounting and Terraform details Updated README.md to include new instructions for mounting ISO and clarify mandatory Terraform variables. Signed-off-by: Harika --- third_party/Dell/ubuntu-22.04/iac/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md index 7bbe5840..9265c98d 100644 --- a/third_party/Dell/ubuntu-22.04/iac/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -29,6 +29,7 @@ export IDRAC_PASS=calvin ``` **Mount ISO** ```bash +chmod +x mount-iso.sh ./mount-iso.sh ``` --- @@ -47,7 +48,10 @@ Key Notes **Terraform Variables** -Update your terraform variables in 'terraform.tfvars' +The following variables must be explicitly provided in 'terraform.tfvars' for the Ubuntu installer boot workflow to function correctly. + +While additional variables exist with default values defined in variables.tf, these credentials and endpoints are mandatory and have no safe defaults. + Example (terraform.tfvars): ```bash @@ -179,7 +183,7 @@ free -h **2. Enterprise Inference Services** Verify all inference services are running. ```bash -kubectl get pods +kubectl get pods -A ``` Expected: - All services in RUNNING state From df43c14dcd22c59ab6c62767be660dff72d9eea5 Mon Sep 17 00:00:00 2001 From: Harika Date: Wed, 4 Feb 2026 18:28:34 -0600 Subject: [PATCH 09/35] Update README with script permission and usage instructions Added instructions for changing file permissions and running the script. Signed-off-by: Harika --- third_party/Dell/ubuntu-22.04/iac/README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md index 9265c98d..25d925f1 100644 --- a/third_party/Dell/ubuntu-22.04/iac/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -94,7 +94,13 @@ This script performs **all post-OS configuration** and deploys the **Enterprise - SSH, sudo, certificates - Final inference stack deployment -**Required Parameters** +**Change permission to your file** + +```bash +chmod +x deploy-enterprise-inference.sh +``` +**Required Parameters to run the script** + ```bash sudo ./deploy-enterprise-inference.sh \ -u user \ From 6f4e1d94b975262f58480237eed672be7988b9c2 Mon Sep 17 00:00:00 2001 From: alexsin368 Date: Mon, 2 Feb 2026 16:16:50 -0800 Subject: [PATCH 10/35] add sample_solutions folder Signed-off-by: alexsin368 Signed-off-by: Harika --- sample_solutions/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 sample_solutions/README.md diff --git a/sample_solutions/README.md b/sample_solutions/README.md new file mode 100644 index 00000000..43d98118 --- /dev/null +++ b/sample_solutions/README.md @@ -0,0 +1 @@ +# PLACEHOLDER \ No newline at end of file From d4c43f956e3419dc954428d223b4a3c391c2ae77 Mon Sep 17 00:00:00 2001 From: Harika Date: Thu, 5 Feb 2026 10:50:38 -0600 Subject: [PATCH 11/35] update keycloack values Signed-off-by: Harika --- .../Dell/ubuntu-22.04/EI/single-node/troubleshooting.md | 6 +++--- .../Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md | 6 +++--- .../Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md b/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md index 90bb4bd7..f4d22f3d 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md @@ -70,9 +70,9 @@ Before re-running deployment, verify and update your inference-config.cfg: cluster_url=api.example.com cert_file=~/certs/cert.pem key_file=~/certs/key.pem -keycloak_client_id=api -keycloak_admin_user=api-admin -keycloak_admin_password=changeme!! +keycloak_client_id=my-client-id # <-- Replace with your Keycloak client ID +keycloak_admin_user=your-keycloak-admin-user # <-- Replace with your keycloak admin username +keycloak_admin_password=changeme # <-- Replace with your keycloak admin password vault_pass_code=place-holder-123 deploy_kubernetes_fresh=on deploy_ingress_controller=on diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md index d13b84df..81f327cf 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md @@ -172,9 +172,9 @@ Sample default values (insert your token) for a full deployment of the inference cluster_url=api.example.com # <-- Replace with your own FQDN cert_file=~/certs/cert.pem key_file=~/certs/key.pem -keycloak_client_id=api -keycloak_admin_user=api-admin -keycloak_admin_password=changeme!! +keycloak_client_id=my-client-id # <-- Replace with Keycloak client ID +keycloak_admin_user=your-keycloak-admin-user # <-- Replace with your keycloack admin username +keycloak_admin_password=changeme # <-- Replace with your keycloack admin password hugging_face_token=your_hugging_face_token hugging_face_token_falcon3=your_hugging_face_token models= diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md index 3fe7beac..c04b2a9c 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md @@ -161,9 +161,9 @@ vi core/inventory/inference-config.cfg cluster_url=api.example.com # <-- Replace with your own FQDN cert_file=~/certs/cert.pem key_file=~/certs/key.pem -keycloak_client_id=api -keycloak_admin_user=api-admin -keycloak_admin_password=changeme!! +keycloak_client_id=my-client-id # <-- Replace with Keycloak client ID +keycloak_admin_user=your-keycloak-admin-user # <-- Replace with your keycloack admin username +keycloak_admin_password=changeme # <-- Replace with your keycloack admin password hugging_face_token=your_hugging_face_token hugging_face_token_falcon3=your_hugging_face_token models= From 4622e9de903af4f34340cf2c75c9626027d71a3d Mon Sep 17 00:00:00 2001 From: Harika Date: Thu, 5 Feb 2026 13:26:57 -0600 Subject: [PATCH 12/35] update cluster url Signed-off-by: Harika --- .../Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh b/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh index 51471097..a800ccbc 100644 --- a/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh +++ b/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh @@ -689,7 +689,7 @@ main() { cd certs if [[ ! -f "cert.pem" ]] || [[ ! -f "key.pem" ]]; then - su "${USERNAME}" -c "openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -days 365 -nodes -subj '/CN=api.example.com'" + su "${USERNAME}" -c "openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -days 365 -nodes -subj '/CN=${CLUSTER_URL}'" log_success "SSL certificates created" else log_info "SSL certificates already exist" From a6421bd52ea3befca679da337d8332e90c3c16c2 Mon Sep 17 00:00:00 2001 From: Harika Date: Thu, 5 Feb 2026 16:09:42 -0600 Subject: [PATCH 13/35] update readme Signed-off-by: Harika --- third_party/Dell/ubuntu-22.04/iac/README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md index 25d925f1..19794fcf 100644 --- a/third_party/Dell/ubuntu-22.04/iac/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -76,9 +76,19 @@ It will prompt for the user inputs during the installation, provide your inputs ## 3.Post-OS Enterprise Inference Deployment -Once OS is installed, login to your machine and run the post-os installations below. +Once OS is installed, Download the deploy-enterprise-inference.sh script to your machine using either wget or curl. -**Script:** [iac/deploy-enterprise-inference.sh](./deploy-enterprise-inference.sh) +```bash +wget -O deploy-enterprise-inference.sh \ +https://raw.githubusercontent.com/cld2labs/Enterprise-Inference/refs/heads/cld2labs/ubuntu22.04-deployment-scripts/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh +``` +or + +```bash +curl -fsSL \ +https://raw.githubusercontent.com/cld2labs/Enterprise-Inference/refs/heads/cld2labs/ubuntu22.04-deployment-scripts/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh \ +-o deploy-enterprise-inference.sh +``` This script performs **all post-OS configuration** and deploys the **Enterprise Inference stack** on a **single node**. From 2155268193163f88fa0cccdddb4cf46619385b0a Mon Sep 17 00:00:00 2001 From: Harika Date: Thu, 5 Feb 2026 16:11:06 -0600 Subject: [PATCH 14/35] update readme Signed-off-by: Harika --- third_party/Dell/ubuntu-22.04/iac/README.md | 24 ++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md index 19794fcf..16f80cea 100644 --- a/third_party/Dell/ubuntu-22.04/iac/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -46,6 +46,25 @@ Key Notes - Power reset is forced using redfish_power - Boot mode (UEFI/Legacy) is not configurable on 17G servers +**Terraform Installation (Client Machine)** + +Terraform is executed from a client machine (such as your laptop or a jump host), not from the target server or iDRAC. + +Install Terraform on the machine where you will run the Terraform commands. + +Download Terraform: +https://developer.hashicorp.com/terraform/install + +Choose the package for your operating system and follow the installation instructions. + +Verify Installation +```bash +terraform version +``` +Terraform should return a version without errors. + +If Terraform is not found, ensure the installation directory is added to your system PATH. + **Terraform Variables** The following variables must be explicitly provided in 'terraform.tfvars' for the Ubuntu installer boot workflow to function correctly. @@ -117,7 +136,8 @@ sudo ./deploy-enterprise-inference.sh \ -p Linux123! \ -t hf_xxxxxxxxxxxxx \ -g gaudi3 \ --m "1" +-m "1" \ +-a "replace-with-your-dns" ``` | Option | Description | @@ -131,6 +151,7 @@ sudo ./deploy-enterprise-inference.sh \ | -r | Resume from last checkpoint | | -d | keycloak or genai, by default set to keycloak | | -o | off or on, by default observability set to off | +| -a | Cluster URL/ FQDN | **Resume After Failure** @@ -142,6 +163,7 @@ sudo ./deploy-enterprise-inference.sh \ -t hf_XXXXXXXXXXXX \ -g gaudi3 \ -m "1" \ +-a "replace-with-your-dns" -r ``` From 79187cc44bc75f0987e8fb771fdf9d10155f36de Mon Sep 17 00:00:00 2001 From: Harika Date: Thu, 5 Feb 2026 16:11:39 -0600 Subject: [PATCH 15/35] update readme Signed-off-by: Harika --- third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md b/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md index f4d22f3d..ba8c6207 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md @@ -67,7 +67,7 @@ Deployment fails due to incorrect or missing configuration values. **Fix:** Before re-running deployment, verify and update your inference-config.cfg: ```bash -cluster_url=api.example.com +cluster_url=api.example.com # <-- Replace with cluster url cert_file=~/certs/cert.pem key_file=~/certs/key.pem keycloak_client_id=my-client-id # <-- Replace with your Keycloak client ID From 134f5e85fccc62ba7e0992b924912f2c79e43f7c Mon Sep 17 00:00:00 2001 From: Harika Date: Thu, 5 Feb 2026 16:56:56 -0600 Subject: [PATCH 16/35] update apisix for token generation step Signed-off-by: Harika --- .../EI/single-node/user-guide-apisix.md | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md index 81f327cf..fa316162 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md @@ -252,24 +252,26 @@ kubectl get apisixroutes ### 5. Test the Inference -**Environment Setup** +**Obtain Access Token** + +Before generating the access token, ensure all Keycloak-related values are correctly set in the `Enterprise-Inference/core/scripts/generate-token.sh` and these values must match with keycloak values in `Enterprise-Inference/core/inventory/inference-config.cfg` . + ```bash -export CLUSTER_URL=api.example.com -export BASE_URL=https://api.example.com -export KEYCLOAK_REALM=master -export KEYCLOAK_CLIENT_ID=api -export KEYCLOAK_CLIENT_SECRET=$(bash scripts/keycloak-fetch-client-secret.sh api.example.com api-admin 'changeme!!' api | awk -F': ' '/Client secret:/ {print $2}') +cd Enterprise-Inference/core/scripts +chmod +x generate-token.sh +./generate-token.sh ``` -**Obtain Access Token** +**Verify the Token** + +After the script completes successfully, confirm that the token is available in your shell: ```bash -export TOKEN=$(curl -k -X POST $BASE_URL/token \ - -H 'Content-Type: application/x-www-form-urlencoded' \ - -d "grant_type=client_credentials&client_id=${KEYCLOAK_CLIENT_ID}&client_secret=${KEYCLOAK_CLIENT_SECRET}" \ - | jq -r .access_token) +echo $TOKEN ``` +If a valid token is returned (long JWT string), the environment is ready for inference testing. + **Run a test query for Gaudi:** ```bash curl -k ${BASE_URL}/Llama-3.1-8B-Instruct/v1/completions \ From 3a71cf49b06ad0e166de95aef1c45fefab0dd56e Mon Sep 17 00:00:00 2001 From: Harika Date: Thu, 5 Feb 2026 17:33:45 -0600 Subject: [PATCH 17/35] update apisix for token generation step Signed-off-by: Harika --- .../EI/single-node/user-guide-apisix.md | 25 ++++++++++--------- .../EI/single-node/user-guide-genai.md | 18 ++++++------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md index fa316162..20459ad7 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md @@ -155,26 +155,26 @@ git checkout ${RELEASE} ### 2. Configure the Setup Files and Environment **Update inference-config.cfg:** -Update configuration files for single node deployment. Note that changes to the users and passwords in this file will impact the verification step settings in section 4 below. - -* Production installations should set your own values -* Add your Hugging Face token -* Set the cpu_or_gpu value to "cpu" for Xeon models and "gaudi3" for Intel Gaudi 3 accelerator models -* Set deploy_genai_gateway to on, and make deploy_keycloak_apisix to off ```bash vi core/inventory/inference-config.cfg ``` -Sample default values (insert your token) for a full deployment of the inference stack with Llama-8B model. +> **Note:** Update configuration files for single node deployment. Note that changes to the users and passwords in this file will impact the verification step settings in section 4 below. +* Replace cluster_url with your DNS , it must match with DNS used in certs generation. +* Set keycloak `keycloak_client_id` `keycloak_admin_user` `keycloak_admin_password` values +* Add your Hugging Face token +* Set the cpu_or_gpu value to "cpu" for Xeon models and "gaudi3" for Intel Gaudi 3 accelerator models +* Set deploy_keycloak_apisix to on and Set deploy_genai_gateway to off + ``` -cluster_url=api.example.com # <-- Replace with your own FQDN +cluster_url=api.example.com cert_file=~/certs/cert.pem key_file=~/certs/key.pem -keycloak_client_id=my-client-id # <-- Replace with Keycloak client ID -keycloak_admin_user=your-keycloak-admin-user # <-- Replace with your keycloack admin username -keycloak_admin_password=changeme # <-- Replace with your keycloack admin password +keycloak_client_id=my-client-id +keycloak_admin_user=your-keycloak-admin-user +keycloak_admin_password=changeme hugging_face_token=your_hugging_face_token hugging_face_token_falcon3=your_hugging_face_token models= @@ -190,7 +190,6 @@ deploy_ceph=off deploy_istio=off uninstall_ceph=off ``` -> **Note:** Replace cluster_url with your DNS , it must match with DNS used in certs generation. To support non-interactive execution of inference-stack-deploy.sh, create a file named "core/inentory/.become-passfile" with your user's sudo password: @@ -273,6 +272,8 @@ echo $TOKEN If a valid token is returned (long JWT string), the environment is ready for inference testing. **Run a test query for Gaudi:** +> Note: Replace ${BASE_URL} with your DNS + ```bash curl -k ${BASE_URL}/Llama-3.1-8B-Instruct/v1/completions \ -X POST \ diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md index c04b2a9c..d61d7d85 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md @@ -148,22 +148,23 @@ git checkout ${RELEASE} **Update inference-config.cfg** -* Production installations should set your own values -* Add your Hugging Face token -* Set the cpu_or_gpu value to "cpu" for Xeon models and "gaudi3" for Intel Gaudi 3 accelerator models -* Set deploy_genai_gateway to on, and make deploy_keycloak_apisix to off - ```bash vi core/inventory/inference-config.cfg ``` +> **Note:** Update configuration files for single node deployment. Note that changes to the users and passwords in this file will impact the verification step settings in section 4 below. +* Replace cluster_url with your DNS , it must match with DNS used in certs generation. +* Set keycloak `keycloak_client_id` `keycloak_admin_user` `keycloak_admin_password` values +* Add your Hugging Face token +* Set the cpu_or_gpu value to "cpu" for Xeon models and "gaudi3" for Intel Gaudi 3 accelerator models +* Set deploy_genai_gateway to on, and deploy_keycloak_apisix to off ``` cluster_url=api.example.com # <-- Replace with your own FQDN cert_file=~/certs/cert.pem key_file=~/certs/key.pem -keycloak_client_id=my-client-id # <-- Replace with Keycloak client ID -keycloak_admin_user=your-keycloak-admin-user # <-- Replace with your keycloack admin username -keycloak_admin_password=changeme # <-- Replace with your keycloack admin password +keycloak_client_id=my-client-id +keycloak_admin_user=your-keycloak-admin-user +keycloak_admin_password=changeme hugging_face_token=your_hugging_face_token hugging_face_token_falcon3=your_hugging_face_token models= @@ -179,7 +180,6 @@ deploy_ceph=off deploy_istio=off uninstall_ceph=off ``` -> **Note:** Replace cluster_url with your DNS , it must match with DNS used in certs generation. **Update hosts.yaml File** From 9f3e1c67e06a99c5f88dc28d06dbf9a77f5d229f Mon Sep 17 00:00:00 2001 From: Harika Date: Thu, 5 Feb 2026 17:44:11 -0600 Subject: [PATCH 18/35] update apisix for token generation step Signed-off-by: Harika --- .../ubuntu-22.04/EI/single-node/user-guide-apisix.md | 12 ++++++------ .../ubuntu-22.04/EI/single-node/user-guide-genai.md | 11 +++++------ third_party/Dell/ubuntu-22.04/iac/README.md | 4 ++-- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md index 20459ad7..41ccdad1 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md @@ -160,12 +160,12 @@ git checkout ${RELEASE} vi core/inventory/inference-config.cfg ``` -> **Note:** Update configuration files for single node deployment. Note that changes to the users and passwords in this file will impact the verification step settings in section 4 below. -* Replace cluster_url with your DNS , it must match with DNS used in certs generation. -* Set keycloak `keycloak_client_id` `keycloak_admin_user` `keycloak_admin_password` values -* Add your Hugging Face token -* Set the cpu_or_gpu value to "cpu" for Xeon models and "gaudi3" for Intel Gaudi 3 accelerator models -* Set deploy_keycloak_apisix to on and Set deploy_genai_gateway to off +> **Note:** Update configuration files for single node apisix deployment, Below are the changes needed. +> * Replace cluster_url with your DNS , it must match with DNS used in certs generation. +> * Set keycloak `keycloak_client_id` `keycloak_admin_user` `keycloak_admin_password` values +> * Add your Hugging Face token +> * Set the cpu_or_gpu value to "cpu" for Xeon models and "gaudi3" for Intel Gaudi 3 accelerator models +> * Set deploy_keycloak_apisix to on and Set deploy_genai_gateway to off ``` diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md index d61d7d85..c7bdb3bd 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md @@ -151,12 +151,11 @@ git checkout ${RELEASE} ```bash vi core/inventory/inference-config.cfg ``` -> **Note:** Update configuration files for single node deployment. Note that changes to the users and passwords in this file will impact the verification step settings in section 4 below. -* Replace cluster_url with your DNS , it must match with DNS used in certs generation. -* Set keycloak `keycloak_client_id` `keycloak_admin_user` `keycloak_admin_password` values -* Add your Hugging Face token -* Set the cpu_or_gpu value to "cpu" for Xeon models and "gaudi3" for Intel Gaudi 3 accelerator models -* Set deploy_genai_gateway to on, and deploy_keycloak_apisix to off +> **Note:** Update configuration files for single node genai deployment, Below are the changes needed. +> * Replace cluster_url with your DNS , it must match with DNS used in certs generation. +> * Add your Hugging Face token +> * Set the cpu_or_gpu value to "cpu" for Xeon models and "gaudi3" for Intel Gaudi 3 accelerator models +> * Set deploy_genai_gateway to on, and deploy_keycloak_apisix to off ``` cluster_url=api.example.com # <-- Replace with your own FQDN diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md index 16f80cea..dbadd0a6 100644 --- a/third_party/Dell/ubuntu-22.04/iac/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -52,12 +52,12 @@ Terraform is executed from a client machine (such as your laptop or a jump host) Install Terraform on the machine where you will run the Terraform commands. -Download Terraform: +**Download Terraform:** https://developer.hashicorp.com/terraform/install Choose the package for your operating system and follow the installation instructions. -Verify Installation +**Verify Installation** ```bash terraform version ``` From e9ace9763bfc3618117b8798b5aac153e28347b4 Mon Sep 17 00:00:00 2001 From: Harika Date: Thu, 5 Feb 2026 17:48:57 -0600 Subject: [PATCH 19/35] update apisix for token generation step Signed-off-by: Harika --- third_party/Dell/ubuntu-22.04/iac/README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md index dbadd0a6..74f45e73 100644 --- a/third_party/Dell/ubuntu-22.04/iac/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -48,7 +48,7 @@ Key Notes **Terraform Installation (Client Machine)** -Terraform is executed from a client machine (such as your laptop or a jump host), not from the target server or iDRAC. +> **Note:** Terraform is executed from a client machine (such as your laptop or a jump host), not from the target server or iDRAC. Install Terraform on the machine where you will run the Terraform commands. @@ -61,9 +61,7 @@ Choose the package for your operating system and follow the installation instruc ```bash terraform version ``` -Terraform should return a version without errors. - -If Terraform is not found, ensure the installation directory is added to your system PATH. +Terraform should return a version without errors. If Terraform is not found, ensure the installation directory is added to your system PATH. **Terraform Variables** From c673d0829470636a41284ed19de382311ce6ea3e Mon Sep 17 00:00:00 2001 From: Harika Date: Thu, 5 Feb 2026 17:59:05 -0600 Subject: [PATCH 20/35] update script link Signed-off-by: Harika --- third_party/Dell/ubuntu-22.04/iac/README.md | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md index 74f45e73..27f0923b 100644 --- a/third_party/Dell/ubuntu-22.04/iac/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -95,17 +95,7 @@ It will prompt for the user inputs during the installation, provide your inputs Once OS is installed, Download the deploy-enterprise-inference.sh script to your machine using either wget or curl. -```bash -wget -O deploy-enterprise-inference.sh \ -https://raw.githubusercontent.com/cld2labs/Enterprise-Inference/refs/heads/cld2labs/ubuntu22.04-deployment-scripts/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh -``` -or - -```bash -curl -fsSL \ -https://raw.githubusercontent.com/cld2labs/Enterprise-Inference/refs/heads/cld2labs/ubuntu22.04-deployment-scripts/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh \ --o deploy-enterprise-inference.sh -``` +**Script:** [iac/deploy-enterprise-inference.sh](./deploy-enterprise-inference.sh) This script performs **all post-OS configuration** and deploys the **Enterprise Inference stack** on a **single node**. From 793a2c924d43bed58d85f7c822a021c3c2f7fc00 Mon Sep 17 00:00:00 2001 From: Harika Date: Thu, 5 Feb 2026 18:01:00 -0600 Subject: [PATCH 21/35] update script link Signed-off-by: Harika --- third_party/Dell/ubuntu-22.04/iac/README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md index 27f0923b..5b850f60 100644 --- a/third_party/Dell/ubuntu-22.04/iac/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -125,7 +125,6 @@ sudo ./deploy-enterprise-inference.sh \ -t hf_xxxxxxxxxxxxx \ -g gaudi3 \ -m "1" \ --a "replace-with-your-dns" ``` | Option | Description | @@ -139,7 +138,6 @@ sudo ./deploy-enterprise-inference.sh \ | -r | Resume from last checkpoint | | -d | keycloak or genai, by default set to keycloak | | -o | off or on, by default observability set to off | -| -a | Cluster URL/ FQDN | **Resume After Failure** @@ -151,7 +149,6 @@ sudo ./deploy-enterprise-inference.sh \ -t hf_XXXXXXXXXXXX \ -g gaudi3 \ -m "1" \ --a "replace-with-your-dns" -r ``` From 3c7f5e05a0871b02e4a38192ec2b257e042c135a Mon Sep 17 00:00:00 2001 From: Harika Date: Mon, 9 Feb 2026 16:27:25 -0600 Subject: [PATCH 22/35] update deployment script Signed-off-by: Harika --- .../EI/single-node/user-guide-apisix.md | 32 +++---- .../iac/deploy-enterprise-inference.sh | 89 +++++++++++-------- 2 files changed, 61 insertions(+), 60 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md index 41ccdad1..3fe2f791 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md @@ -8,11 +8,10 @@ - [3. DNS and SSL/TLS Setup](#3-dns-and-ssltls-setup) - [4. Hugging Face Token Setup](#4-hugging-face-token-setup) - [Single Node Deployment Guide](#single-node-deployment-guide) - - [1. Clone the Repository](#1-clone-the-repository) - - [2. Configure the Setup Files and Environment](#2-configure-the-setup-files-and-environment) - - [3. Run the Deployment](#3-run-the-deployment) - - [4. Verify the Deployment](#4-verify-the-deployment) - - [5. Test the Inference](#5-test-the-inference) + - [1. Configure the Setup Files and Environment](#2-configure-the-setup-files-and-environment) + - [2. Run the Deployment](#3-run-the-deployment) + - [3. Verify the Deployment](#4-verify-the-deployment) + - [4. Test the Inference](#5-test-the-inference) - [Troubleshooting](#troubleshooting) - [Summary](#summary) @@ -140,24 +139,15 @@ This section explains how to deploy Intel® AI for Enterprise Inference on a sin --- -### 1. Clone the Repository -```bash -cd ~ -git clone https://github.com/opea-project/Enterprise-Inference.git -cd Enterprise-Inference -git checkout ${RELEASE} -``` -> **Note:** Update the RELEASE environment variable to point to the desired Enterprise Inference version(for example: release-1.4.0) - ---- - -### 2. Configure the Setup Files and Environment +### 1. Configure the Setup Files and Environment **Update inference-config.cfg:** +Clone the repository, If repo is not downloaded on target machine. + ```bash -vi core/inventory/inference-config.cfg +vi Enterprise-Inference/core/inventory/inference-config.cfg ``` > **Note:** Update configuration files for single node apisix deployment, Below are the changes needed. @@ -206,7 +196,7 @@ cp -f docs/examples/single-node/hosts.yaml core/inventory/hosts.yaml > Note: The ansible_user field is set to ubuntu by default. Change it to the actual username used. -### 3. Run the Deployment +### 2. Run the Deployment > **Note:** > The `--models` argument selects a model using its **numeric ID** @@ -231,7 +221,7 @@ chmod +x inference-stack-deploy.sh When prompted, choose option **1) Provision Enterprise Inference Cluster** and confirm **Yes** to start installation. If using Intel® Gaudi® hardware, make sure firmware and drivers are updated before running this script. -### 4. Verify the Deployment +### 3. Verify the Deployment Verify Pods Status ```bash @@ -249,7 +239,7 @@ kubectl get apisixroutes --- -### 5. Test the Inference +### 4. Test the Inference **Obtain Access Token** diff --git a/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh b/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh index a800ccbc..6f8352cf 100644 --- a/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh +++ b/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh @@ -20,6 +20,7 @@ # -o, --observability Enable observability: 'on' or 'off' (default: off) # -r, --resume Resume from checkpoint (auto-detected if state file exists) # -s, --state-file State file path (default: /tmp/ei-deploy.state) +# -a, --api-fqdn API FQDN for hosts and cert (default: api.example.com) # -h, --help Show this help message # # Example: @@ -34,7 +35,7 @@ USERNAME="Replace-with-your-username" HF_TOKEN="Replace-with-your-hugging face token" USER_PASSWORD="Replace-with-your-user-password" GPU_TYPE="Enter gaudi3/cpu based on your deployment" -MODELS="Enter Model number" +MODELS="" DEPLOYMENT_MODE="keycloak" DEPLOY_OBSERVABILITY="off" KEYCLOAK_CLIENT_ID="api" @@ -44,6 +45,7 @@ FIRMWARE_VERSION="1.22.1" STATE_FILE="/tmp/ei-deploy.state" BRANCH="release-1.4.0" REPO_URL="https://github.com/opea-project/Enterprise-Inference" +API_FQDN="api.example.com" RESUME=false ACTION="deploy" @@ -95,6 +97,10 @@ log_error() { echo -e "${RED}[ERROR]${NC} $1" >&2 } +escape_sed_replacement() { + printf '%s' "$1" | sed -e 's/[&|\\]/\\&/g' +} + # Hugging Face token checks for selected model numbers check_hf_token_access() { log_info "Validating Hugging Face token..." @@ -162,16 +168,29 @@ check_hf_token_access() { update_inference_config() { if [[ -f "$CONFIG_FILE" ]]; then + local hf_token_escaped models_escaped gpu_type_escaped + local keycloak_client_id_escaped keycloak_admin_user_escaped keycloak_admin_password_escaped + local deploy_keycloak_apisix_escaped deploy_genai_gateway_escaped deploy_observability_escaped + + hf_token_escaped=$(escape_sed_replacement "${HF_TOKEN}") + gpu_type_escaped=$(escape_sed_replacement "${GPU_TYPE}") + keycloak_client_id_escaped=$(escape_sed_replacement "${KEYCLOAK_CLIENT_ID}") + keycloak_admin_user_escaped=$(escape_sed_replacement "${KEYCLOAK_ADMIN_USER}") + keycloak_admin_password_escaped=$(escape_sed_replacement "${KEYCLOAK_ADMIN_PASSWORD}") + deploy_keycloak_apisix_escaped=$(escape_sed_replacement "${DEPLOY_KEYCLOAK_APISIX}") + deploy_genai_gateway_escaped=$(escape_sed_replacement "${DEPLOY_GENAI_GATEWAY}") + deploy_observability_escaped=$(escape_sed_replacement "${DEPLOY_OBSERVABILITY}") + sed -i -E \ - -e 's/^[[:space:]]*hugging_face_token[[:space:]]*=.*/hugging_face_token='${HF_TOKEN}'/' \ - -e 's/^[[:space:]]*models[[:space:]]*=.*/models='${MODELS}'/' \ - -e 's/^[[:space:]]*cpu_or_gpu[[:space:]]*=.*/cpu_or_gpu='${GPU_TYPE}'/' \ - -e 's/^[[:space:]]*keycloak_client_id[[:space:]]*=.*/keycloak_client_id='${KEYCLOAK_CLIENT_ID}'/' \ - -e 's/^[[:space:]]*keycloak_admin_user[[:space:]]*=.*/keycloak_admin_user='${KEYCLOAK_ADMIN_USER}'/' \ - -e 's/^[[:space:]]*keycloak_admin_password[[:space:]]*=.*/keycloak_admin_password='${KEYCLOAK_ADMIN_PASSWORD}'/' \ - -e 's/^[[:space:]]*deploy_keycloak_apisix[[:space:]]*=.*/deploy_keycloak_apisix='${DEPLOY_KEYCLOAK_APISIX}'/' \ - -e 's/^[[:space:]]*deploy_genai_gateway[[:space:]]*=.*/deploy_genai_gateway='${DEPLOY_GENAI_GATEWAY}'/' \ - -e 's/^[[:space:]]*deploy_observability[[:space:]]*=.*/deploy_observability='${DEPLOY_OBSERVABILITY}'/' \ + -e "s|^[[:space:]]*hugging_face_token[[:space:]]*=.*|hugging_face_token=${hf_token_escaped}|" \ + -e "s|^[[:space:]]*models[[:space:]]*=.*|models=|" \ + -e "s|^[[:space:]]*cpu_or_gpu[[:space:]]*=.*|cpu_or_gpu=${gpu_type_escaped}|" \ + -e "s|^[[:space:]]*keycloak_client_id[[:space:]]*=.*|keycloak_client_id=${keycloak_client_id_escaped}|" \ + -e "s|^[[:space:]]*keycloak_admin_user[[:space:]]*=.*|keycloak_admin_user=${keycloak_admin_user_escaped}|" \ + -e "s|^[[:space:]]*keycloak_admin_password[[:space:]]*=.*|keycloak_admin_password=${keycloak_admin_password_escaped}|" \ + -e "s|^[[:space:]]*deploy_keycloak_apisix[[:space:]]*=.*|deploy_keycloak_apisix=${deploy_keycloak_apisix_escaped}|" \ + -e "s|^[[:space:]]*deploy_genai_gateway[[:space:]]*=.*|deploy_genai_gateway=${deploy_genai_gateway_escaped}|" \ + -e "s|^[[:space:]]*deploy_observability[[:space:]]*=.*|deploy_observability=${deploy_observability_escaped}|" \ "$CONFIG_FILE" log_info "Updated inference-config.cfg with models='${MODELS}' and cpu_or_gpu=${GPU_TYPE}" else @@ -195,13 +214,14 @@ Required Options (uninstall): Optional Options: -p, --password User sudo password for Ansible (default: Linux123!) -g, --gpu-type GPU type: 'gaudi3' or 'cpu' (default: gaudi3) - -m, --models Model IDs to deploy, comma-separated (default: "1") + -m, --models Model IDs to deploy, comma-separated (default: empty) -b, --branch Git branch to clone (default: dell-deploy) -f, --firmware-version Firmware version (default: 1.22.1) -d, --deployment-mode Deployment mode: 'keycloak' or 'genai' (default: keycloak) -o, --observability Enable observability: 'on' or 'off' (default: off) -s, --state-file State file path (default: /tmp/ei-deploy.state) -r, --resume Force resume from checkpoint + -a, --api-fqdn API FQDN for hosts and cert (default: api.example.com) -h, --help Show this help message Notes: @@ -262,6 +282,10 @@ while [[ $# -gt 0 ]]; do STATE_FILE="$2" shift 2 ;; + -a|--api-fqdn) + API_FQDN="$2" + shift 2 + ;; -r|--resume) RESUME=true shift @@ -543,22 +567,7 @@ main() { fi if [[ "$GPU_TYPE" == "cpu" ]] || [[ "$GPU_TYPE" == "gaudi3" ]]; then CONFIG_FILE="/home/${USERNAME}/Enterprise-Inference/core/inventory/inference-config.cfg" - if [[ -f "$CONFIG_FILE" ]]; then - sed -i -E \ - -e 's/^[[:space:]]*hugging_face_token[[:space:]]*=.*/hugging_face_token='${HF_TOKEN}'/' \ - -e 's/^[[:space:]]*models[[:space:]]*=.*/models='${MODELS}'/' \ - -e 's/^[[:space:]]*cpu_or_gpu[[:space:]]*=.*/cpu_or_gpu='${GPU_TYPE}'/' \ - -e 's/^[[:space:]]*keycloak_client_id[[:space:]]*=.*/keycloak_client_id='${KEYCLOAK_CLIENT_ID}'/' \ - -e 's/^[[:space:]]*keycloak_admin_user[[:space:]]*=.*/keycloak_admin_user='${KEYCLOAK_ADMIN_USER}'/' \ - -e 's/^[[:space:]]*keycloak_admin_password[[:space:]]*=.*/keycloak_admin_password='${KEYCLOAK_ADMIN_PASSWORD}'/' \ - -e 's/^[[:space:]]*deploy_keycloak_apisix[[:space:]]*=.*/deploy_keycloak_apisix='${DEPLOY_KEYCLOAK_APISIX}'/' \ - -e 's/^[[:space:]]*deploy_genai_gateway[[:space:]]*=.*/deploy_genai_gateway='${DEPLOY_GENAI_GATEWAY}'/' \ - -e 's/^[[:space:]]*deploy_observability[[:space:]]*=.*/deploy_observability='${DEPLOY_OBSERVABILITY}'/' \ - "$CONFIG_FILE" - log_info "Updated inference-config.cfg with models='${MODELS}' and cpu_or_gpu=${GPU_TYPE}" - else - log_warn "inference-config.cfg not found at $CONFIG_FILE, skipping update." - fi + update_inference_config fi # ------------------------------------------------------------ @@ -567,11 +576,14 @@ main() { if [[ "$GPU_TYPE" == "cpu" ]]; then log_info "CPU-only mode detected — disabling NRI and CPU balloons" + # Normalize file: always end with newline + sed -i -e '$a\' "$CONFIG_FILE" + # Update if keys exist sed -i -E \ - -e 's/^[[:space:]]*enable_nri[[:space:]]*=.*/enable_nri=false/' \ - -e 's/^[[:space:]]*enable_cpu_balloons[[:space:]]*=.*/enable_cpu_balloons=false/' \ - "$CONFIG_FILE" || true + -e 's/^[[:space:]]*enable_nri[[:space:]]*=.*/enable_nri=false/' \ + -e 's/^[[:space:]]*enable_cpu_balloons[[:space:]]*=.*/enable_cpu_balloons=false/' \ + "$CONFIG_FILE" || true # Append if keys do not exist grep -q '^enable_nri=' "$CONFIG_FILE" || echo 'enable_nri=false' >> "$CONFIG_FILE" @@ -631,9 +643,9 @@ main() { # Step 5: Add hostname to /etc/hosts if ! skip_if_completed "hosts_file"; then log_info "Step 5: Adding hostname to /etc/hosts..." - if ! grep -q "api.example.com" /etc/hosts; then + if ! grep -q "${API_FQDN}" /etc/hosts; then echo "" >> /etc/hosts - echo "127.0.0.1 api.example.com" >> /etc/hosts + echo "127.0.0.1 ${API_FQDN}" >> /etc/hosts log_success "Hostname added to /etc/hosts" else log_info "Hostname already in /etc/hosts" @@ -689,7 +701,7 @@ main() { cd certs if [[ ! -f "cert.pem" ]] || [[ ! -f "key.pem" ]]; then - su "${USERNAME}" -c "openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -days 365 -nodes -subj '/CN=${CLUSTER_URL}'" + su "${USERNAME}" -c "openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -days 365 -nodes -subj '/CN=${API_FQDN}'" log_success "SSL certificates created" else log_info "SSL certificates already exist" @@ -776,7 +788,7 @@ main() { log_info "Parameters: --models '${MODELS}' --cpu-or-gpu '${GPU_TYPE}' --hugging-face-token " # Run the deployment script - if [[ "$FORCE_INTERACTIVE_DEPLOY" == true ]]; then + if [[ "$FORCE_INTERACTIVE_DEPLOY" == true || -z "${MODELS}" ]]; then log_info "State file indicates a prior deployment; running interactively" CONFIG_FILE="/home/${USERNAME}/Enterprise-Inference/core/inventory/inference-config.cfg" update_inference_config @@ -787,7 +799,7 @@ main() { } else # Using echo to provide input: "1" for "Provision Enterprise Inference Cluster", "yes" for confirmation - su "${USERNAME}" -c "cd /home/${USERNAME}/Enterprise-Inference/core && echo -e '1\nyes' | bash ./inference-stack-deploy.sh --models '${MODELS}' --cpu-or-gpu '${GPU_TYPE}' --hugging-face-token ${HUGGINGFACE_TOKEN}" || { + su "${USERNAME}" -c "cd /home/${USERNAME}/Enterprise-Inference/core && echo -e '1\n${MODELS}\nyes' | bash ./inference-stack-deploy.sh --models '${MODELS}' --cpu-or-gpu '${GPU_TYPE}' --hugging-face-token ${HUGGINGFACE_TOKEN}" || { log_error "Enterprise Inference Stack deployment failed!" log_warn "You can resume by running this script again with -r flag" exit 1 @@ -798,12 +810,11 @@ main() { save_state "deploy_stack" fi - # Cleanup state file on successful completion + # Keep state file on successful completion for future resumes if [[ -f "$STATE_FILE" ]]; then - rm -f "$STATE_FILE" - log_info "State file cleaned up" + log_info "State file retained for future resumes" fi - + log_success "==========================================" log_success "Deployment completed successfully!" log_success "==========================================" From bb6dbd7b834dcc518d3fac9afe3f3b0e7343492b Mon Sep 17 00:00:00 2001 From: Harika Date: Mon, 9 Feb 2026 16:54:20 -0600 Subject: [PATCH 23/35] update deployment script Signed-off-by: Harika --- third_party/Dell/ubuntu-22.04/iac/README.md | 3 +++ .../Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md index 5b850f60..ac7629e6 100644 --- a/third_party/Dell/ubuntu-22.04/iac/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -124,6 +124,7 @@ sudo ./deploy-enterprise-inference.sh \ -p Linux123! \ -t hf_xxxxxxxxxxxxx \ -g gaudi3 \ +-a cluster-url \ -m "1" \ ``` @@ -135,6 +136,7 @@ sudo ./deploy-enterprise-inference.sh \ | -g | gaudi3 or cpu | | -m | Model IDs | | -b | Repo branch (default: release-1.4.0) | +| -a | cluster -url | | -r | Resume from last checkpoint | | -d | keycloak or genai, by default set to keycloak | | -o | off or on, by default observability set to off | @@ -148,6 +150,7 @@ sudo ./deploy-enterprise-inference.sh \ -p Linux123! \ -t hf_XXXXXXXXXXXX \ -g gaudi3 \ +-a cluster-url \ -m "1" \ -r ``` diff --git a/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh b/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh index 6f8352cf..ed47da50 100644 --- a/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh +++ b/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh @@ -38,9 +38,9 @@ GPU_TYPE="Enter gaudi3/cpu based on your deployment" MODELS="" DEPLOYMENT_MODE="keycloak" DEPLOY_OBSERVABILITY="off" -KEYCLOAK_CLIENT_ID="api" -KEYCLOAK_ADMIN_USER="api-admin" -KEYCLOAK_ADMIN_PASSWORD="changeme!!" +KEYCLOAK_CLIENT_ID="my-client-id" +KEYCLOAK_ADMIN_USER="your-keycloak-admin-user" +KEYCLOAK_ADMIN_PASSWORD="changeme" FIRMWARE_VERSION="1.22.1" STATE_FILE="/tmp/ei-deploy.state" BRANCH="release-1.4.0" From e7dcb8c1d6442e4fb95fafc14327127a9a88fffa Mon Sep 17 00:00:00 2001 From: Harika Date: Wed, 11 Feb 2026 12:51:05 -0600 Subject: [PATCH 24/35] Adding model-deployment guide Signed-off-by: Harika --- .../EI/single-node/troubleshooting.md | 2 +- .../EI/single-node/user-guide-apisix.md | 15 +- third_party/Dell/ubuntu-22.04/iac/README.md | 48 +++- .../iac/deploy-enterprise-inference.sh | 10 +- .../Dell/ubuntu-22.04/iac/model-deployment.md | 218 ++++++++++++++++++ 5 files changed, 279 insertions(+), 14 deletions(-) create mode 100644 third_party/Dell/ubuntu-22.04/iac/model-deployment.md diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md b/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md index ba8c6207..a4cfe0c9 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/troubleshooting.md @@ -65,7 +65,7 @@ Two options: Deployment fails due to incorrect or missing configuration values. **Fix:** -Before re-running deployment, verify and update your inference-config.cfg: +Before re-running deployment, verify and update your inference-config.cfg. These values must match your actual deployment environment. ```bash cluster_url=api.example.com # <-- Replace with cluster url cert_file=~/certs/cert.pem diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md index 3fe2f791..723eb1cd 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md @@ -8,10 +8,10 @@ - [3. DNS and SSL/TLS Setup](#3-dns-and-ssltls-setup) - [4. Hugging Face Token Setup](#4-hugging-face-token-setup) - [Single Node Deployment Guide](#single-node-deployment-guide) - - [1. Configure the Setup Files and Environment](#2-configure-the-setup-files-and-environment) - - [2. Run the Deployment](#3-run-the-deployment) - - [3. Verify the Deployment](#4-verify-the-deployment) - - [4. Test the Inference](#5-test-the-inference) + - [1. Configure the Setup Files and Environment](#1-configure-the-setup-files-and-environment) + - [2. Run the Deployment](#2-run-the-deployment) + - [3. Verify the Deployment](#3-verify-the-deployment) + - [4. Test the Inference](#4-test-the-inference) - [Troubleshooting](#troubleshooting) - [Summary](#summary) @@ -199,7 +199,7 @@ cp -f docs/examples/single-node/hosts.yaml core/inventory/hosts.yaml ### 2. Run the Deployment > **Note:** -> The `--models` argument selects a model using its **numeric ID** +> The '--models' argument allows you to specify one or more models by their numeric ID. [full list of available model IDs](../../iac/README.md#pre-integrated-models-list) > If `--models` is omitted, the installer displays the full model list and prompts you to select a model interactively. Run the setup for Gaudi @@ -248,7 +248,7 @@ Before generating the access token, ensure all Keycloak-related values are corre ```bash cd Enterprise-Inference/core/scripts chmod +x generate-token.sh -./generate-token.sh +. generate-token.sh ``` **Verify the Token** @@ -256,6 +256,7 @@ chmod +x generate-token.sh After the script completes successfully, confirm that the token is available in your shell: ```bash +echo $BASE_URL echo $TOKEN ``` @@ -302,4 +303,4 @@ This document provides common deployment and runtime issues observed during Inte - Configured SSH, DNS, and SSL - Generated your Hugging Face token - Deployed Intel® AI for Enterprise Inference -- Tested a working model endpoint +- Tested a working model endpoint \ No newline at end of file diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md index ac7629e6..8fd512fb 100644 --- a/third_party/Dell/ubuntu-22.04/iac/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -134,7 +134,7 @@ sudo ./deploy-enterprise-inference.sh \ | -p | OS userpassword | | -t | Hugging Face token | | -g | gaudi3 or cpu | -| -m | Model IDs | +| -m | Choose model ID from [Pre-Integrated Models List](#pre-integrated-models-list) , based on your deployment type (gaudi or cpu) | | -b | Repo branch (default: release-1.4.0) | | -a | cluster -url | | -r | Resume from last checkpoint | @@ -269,6 +269,52 @@ if EI is deployed with apisix, follow [Testing EI model with apisix](../EI/singl if EI is deployed with genai, follow [Testing EI model with genai](../EI/single-node/user-guide-genai.md#5-test-the-inference) for generating api-key and testing the inference --- +## Additional Information + +### Pre-Integrated Models List + +Enterprise Inference provides a set of pre-integrated and validated models optimized for performance and stability. These models can be deployed directly using the Enterprise Inference catalog. + +**Pre-Integrated Gaudi Models** + +**Model ID** | **Model** | +----------------|:------------------------------------------:| +1 | meta-llama/Llama-3.1-8B-Instruct | +2 | meta-llama/Llama-3.1-70B-Instruct | +3 | meta-llama/Llama-3.1-405B-Instruct | +4 | meta-llama/Llama-3.3-70B-Instruct | +5 | meta-llama/Llama-4-Scout-17B-16E-Instruct | +6 | Qwen/Qwen2.5-32B-Instruct | +7 | deepseek-ai/DeepSeek-R1-Distill-Qwen-32B | +8 | deepseek-ai/DeepSeek-R1-Distill-Llama-8B | +9 | mistralai/Mixtral-8x7B-Instruct-v0.1 | +10 | mistralai/Mistral-7B-Instruct-v0.3 | +11 | BAAI/bge-base-en-v1.5 | +12 | BAAI/bge-reranker-base | +13 | codellama/CodeLlama-34b-Instruct-hf | +14 | tiiuae/Falcon3-7B-Instruct | + +**Pre-Integrated CPU Models** + + **Model ID** | **Model** | +----------------|:------------------------------------------:| +21 | meta-llama/Llama-3.1-8B-Instruct | +22 | meta-llama/Llama-3.2-3B-Instruct | +23 | deepseek-ai/DeepSeek-R1-Distill-Llama-8B | +24 | deepseek-ai/DeepSeek-R1-Distill-Qwen-32B | +25 | Qwen/Qwen3-1.7B | +26 | Qwen/Qwen3-4B-Instruct-2507 | + + +### Model Deployment + +If an Enterprise Inference cluster is already deployed, you can use the interactive deployment script to manage models, including: + + - Deploying additional models from the Enterprise Inference model catalog + - Deploying custom models directly from Hugging Face + - Undeploying existing models from the cluster + +Refer to the [Model Deployment guide](./model-deployment.md) and run the interactive inference-stack-deploy.sh script to perform these operations. ## Summary diff --git a/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh b/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh index ed47da50..40e52d7e 100644 --- a/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh +++ b/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh @@ -38,9 +38,9 @@ GPU_TYPE="Enter gaudi3/cpu based on your deployment" MODELS="" DEPLOYMENT_MODE="keycloak" DEPLOY_OBSERVABILITY="off" -KEYCLOAK_CLIENT_ID="my-client-id" -KEYCLOAK_ADMIN_USER="your-keycloak-admin-user" -KEYCLOAK_ADMIN_PASSWORD="changeme" +KEYCLOAK_CLIENT_ID="api" +KEYCLOAK_ADMIN_USER="api-admin" +KEYCLOAK_ADMIN_PASSWORD="changeme!!" FIRMWARE_VERSION="1.22.1" STATE_FILE="/tmp/ei-deploy.state" BRANCH="release-1.4.0" @@ -792,14 +792,14 @@ main() { log_info "State file indicates a prior deployment; running interactively" CONFIG_FILE="/home/${USERNAME}/Enterprise-Inference/core/inventory/inference-config.cfg" update_inference_config - su "${USERNAME}" -c "cd /home/${USERNAME}/Enterprise-Inference/core && bash ./inference-stack-deploy.sh --cpu-or-gpu '${GPU_TYPE}' --hugging-face-token ${HUGGINGFACE_TOKEN}" || { + sudo -u "${USERNAME}" -H bash -c "cd /home/${USERNAME}/Enterprise-Inference/core && bash ./inference-stack-deploy.sh --cpu-or-gpu '${GPU_TYPE}' --hugging-face-token ${HUGGINGFACE_TOKEN}" || { log_error "Enterprise Inference Stack deployment failed!" log_warn "You can resume by running this script again with -r flag" exit 1 } else # Using echo to provide input: "1" for "Provision Enterprise Inference Cluster", "yes" for confirmation - su "${USERNAME}" -c "cd /home/${USERNAME}/Enterprise-Inference/core && echo -e '1\n${MODELS}\nyes' | bash ./inference-stack-deploy.sh --models '${MODELS}' --cpu-or-gpu '${GPU_TYPE}' --hugging-face-token ${HUGGINGFACE_TOKEN}" || { + sudo -u "${USERNAME}" -H bash -c "cd /home/${USERNAME}/Enterprise-Inference/core && echo -e '1\n${MODELS}\nyes' | bash ./inference-stack-deploy.sh --models '${MODELS}' --cpu-or-gpu '${GPU_TYPE}' --hugging-face-token ${HUGGINGFACE_TOKEN}" || { log_error "Enterprise Inference Stack deployment failed!" log_warn "You can resume by running this script again with -r flag" exit 1 diff --git a/third_party/Dell/ubuntu-22.04/iac/model-deployment.md b/third_party/Dell/ubuntu-22.04/iac/model-deployment.md new file mode 100644 index 00000000..00f26ca3 --- /dev/null +++ b/third_party/Dell/ubuntu-22.04/iac/model-deployment.md @@ -0,0 +1,218 @@ +# Intel® AI for Enterprise Inference — Model Deployment User Guide + +## Table of Contents + +1. [Overview](#1-overview) +2. [Environment Prerequisites](#2-environment-prerequisites) +3. [Model Deployment Workflow](#3-model-deployment-workflow) + - [Deploy Models from Enterprise Inference Catalog](#31-deploy-models-from-enterprise-inference-catalog) + - [Deploy Models Directly from Hugging Face](#32-deploy-models-directly-from-hugging-face) +4. [Undeploy Models](#4-undeploy-models) + - [Undeploy Models from Enterprise Inference Catalog](#41-undeploy-models-from-enterprise-inference-catalog) + - [Undeploy Models Deployed from Hugging Face](#42-undeploy-models-deployed-from-hugging-face) + +## 1. Overview + +This guide outlines the standard procedure for deploying models on an Enterprise Inference cluster using the `inference-stack-deploy.sh` script. + +--- + +## 2. Environment Prerequisites + +- **Host System:** Control plane or master node with access to the inference stack +- **Cluster Access:** Existing or newly provisioned Kubernetes cluster +- **Certificates:** Valid cluster certificate (`cert.pem`) and private key (`key.pem`) +- **Hugging Face Token:** Required for downloading models from Hugging Face +- **Script Path:** `~/Enterprise-Inference/core/inference-stack-deploy.sh` + +--- + +## 3. Model Deployment Workflow + +1. Deploy from pre-integrated Enterprise Inference model catalog +2. Deploy directly from Hugging Face + +Both use the same interactive script and menu flow. + +### 3.1 Deploy Models from Enterprise Inference Catalog + +This method deploys pre-integrated and validated models optimized for Enterprise Inference. + +**Step 1: Run the Deployment Script** + +```bash +bash ~/Enterprise-Inference/core/inference-stack-deploy.sh +``` + +**Step 2: Navigate Through the Menus** + +Choose the following options from the menu: + +**3** – Update Deployed Inference Cluster + +**2** – Manage LLM Models + +**1** – Deploy Model + +**Step 3: Select Model to Deploy** + +The script displays a list of available models and their corresponding numeric IDs based on the selected deployment type (CPU or Gaudi). + +When prompted to `Enter numbers of models to deploy/remove (comma-separated)`, enter the model ID you want to deploy (example: `1`). + +**Step 4: Confirm Deployment** + +When prompted to `Do you wish to continue? (y/n)`, type **y** to proceed. + +**Once confirmed:** +- The model is deployed automatically to the inference cluster. +- All required Kubernetes Pods, Services, and Endpoints are created. + +**Test:** + +Run the following command to verify that the model pod is in the `Running` state. +```bash +kubectl get pods +``` +--- + +### 3.2 Deploy Models Directly from Hugging Face + +This option allows deploying any Hugging Face model, including models not pre-validated by Enterprise Inference. + +**Step 1: To deploy** +```bash +bash ~/Enterprise-Inference/core/inference-stack-deploy.sh +``` +**Step 2: Navigate Through the Menus** + +Choose the following options from the menu: + +**3** – Update Deployed Inference Cluster + +**2** – Manage LLM Models + +**4** – Deploy Model from Hugging Face + +**Step 3: Provide Hugging Face Model Details** + +When prompted to `Enter the Hugging Face Model ID`, enter the desired Hugging Face model ID (example: `mistralai/Mistral-7B-v0.3`). + +> Note: The model(mistralai/Mistral-7B-v0.3) above is only an example. You can enter any compatible Hugging Face model (CPU or Gaudi), depending on your deployment type. + +**Step 4: Provide Deployment name for the model** + +When prompted to `Enter the Hugging Face Model ID`, enter the desired Hugging Face model ID (example: `mistralai/Mistral-7B-v0.3`). + +> **Naming rules:** +> - Lowercase letters only +> - Numbers and hyphens allowed +> - No spaces or special characters +> - Must follow Kubernetes naming conventions + +**Step 5: Provide Tensor Parallel Size (Gaudi Only)** + +Set the tensor parallel size based on available Gaudi cards. + +> Note: > **Note:** This option deploys a model that has not been pre-validated. Ensure the tensor parallel size is configured correctly. An incorrect value may cause the model to remain in a "Not Ready" state. + +**Step 6: Confirm Deployment** + +When prompted to `Do you wish to continue? (y/n)`, type **y** to proceed. + +**Test** + +Run the following command to verify that the model pod is in the `Running` state: + +```bash +kubectl get pods +``` +--- + +## 4. Undeploy Models + +Enterprise Inference allows you to safely undeploy models that were deployed either from: +- The Enterprise Inference model catalog +- Directly from Hugging Face + +### 4.1 Undeploy Models from Enterprise Inference Catalog + +This method is used for models deployed through pre-integrated and validated models for Enterprise Inference. + +**Step 1: Run the Deployment Script** +```bash +bash ~/Enterprise-Inference/core/inference-stack-deploy.sh +``` +**Step 2: Navigate Through the Menus** + +Choose the following options from the menu: + +**3** – Update Deployed Inference Cluster + +**2** – Manage LLM Models + +**2** – Undeploy Model + +**Step 3: Select Model to Remove** + +The script displays a list of available models with their model IDs based on the deployment type (CPU or Gaudi). + +When Prompted to `Enter numbers of models to deploy/remove (comma-separated)` - Enter the model ID you want to remove(Example: 1) + +**Step 4: Confirm Model Removal** + +When prompted to `Do you wish to continue? (y/n)`, type **y** to proceed. +> CAUTION: Removing the Inference LLM Model will also remove its associated services and resources, which may cause service downtime and potential data loss. + +**Once confirmed:** + - The model deployment is deleted + - All associated Kubernetes resources are removed + +**Test** + +Run below command to confirm, if the model pod is deleted. +```bash +kubectl get pods +``` +--- + +### 4.2 Undeploy Models Deployed from Hugging Face + +To remove Models deployed via Deploy Model from Hugging Face + +**Step 1: Run the Script** +```bash +bash ~/Enterprise-Inference/core/inference-stack-deploy.sh +``` +**Step 2: Navigate Through the Menus** + +Choose the following options from the menu: + +**3** – Update Deployed Inference Cluster + +**2** – Manage LLM Models + +**5** – Remove Model using deployment name + +**Step 3: Provide Deployment Name** + +When prompted to `Enter Deployment Name for the Model`, provide a deployment name (example: `mistral-7b-v0-3`). + +> The deployment name must exactly match the name used during model deployment. + +**Step 4: Confirm Removal** + +When prompted to `Do you wish to continue? (y/n)`, type **y** to proceed. +> CAUTION: Removing the Inference LLM Model will also remove its associated services and resources, which may cause service downtime and potential data loss. + +**Once confirmed:** + - The model deployment is deleted + - All associated Kubernetes resources are removed + +**Test** + +Run below command to confirm, if the model pod is deleted. +```bash +kubectl get pods +``` + \ No newline at end of file From 40584c509533254ee9f3d9d916407e68cf0a6e7c Mon Sep 17 00:00:00 2001 From: Harika Date: Wed, 11 Feb 2026 12:57:05 -0600 Subject: [PATCH 25/35] Adding model-deployment guide Signed-off-by: Harika --- .../EI/single-node/user-guide-genai.md | 28 +++++++------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md index c7bdb3bd..12fdd7a2 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md @@ -8,11 +8,10 @@ - [3. DNS and SSL/TLS Setup](#3-dns-and-ssltls-setup) - [4. Hugging Face Token Setup](#4-hugging-face-token-setup) - [Single Node Deployment Guide](#single-node-deployment-guide) - - [1. Clone the Repository](#1-clone-the-repository) - - [2. Configure the Setup Files and Environment](#2-configure-the-setup-files-and-environment) - - [3. Run the Deployment](#3-run-the-deployment) - - [4. Verify the Deployment](#4-verify-the-deployment) - - [5. Test the Inference](#5-test-the-inference) + - [1. Configure the Setup Files and Environment](#1-configure-the-setup-files-and-environment) + - [2. Run the Deployment](#2-run-the-deployment) + - [3. Verify the Deployment](#3-verify-the-deployment) + - [4. Test the Inference](#4-test-the-inference) - [Summary](#summary) --- @@ -135,21 +134,14 @@ SSH keys are required to allow **Ansible** or automation scripts to connect secu ## Single Node Deployment Guide This section explains how to deploy Intel® AI for Enterprise Inference on a single Ubuntu 22.04 server. -### 1. Clone the Repository +### 1. Configure the Setup Files and Environment -```bash -git clone https://github.com/opea-project/Enterprise-Inference.git -cd Enterprise-Inference -git checkout ${RELEASE} -``` -> **Note:** Update the RELEASE environment variable to point to the desired Enterprise Inference version(for example: release-1.4.0) - -### 2. Configure the Setup Files and Environment +Clone the repository, If repo is not downloaded on target machine. **Update inference-config.cfg** ```bash -vi core/inventory/inference-config.cfg +vi Enterprise-Inference/core/inventory/inference-config.cfg ``` > **Note:** Update configuration files for single node genai deployment, Below are the changes needed. > * Replace cluster_url with your DNS , it must match with DNS used in certs generation. @@ -189,7 +181,7 @@ cp -f docs/examples/single-node/hosts.yaml core/inventory/hosts.yaml > Note: The ansible_user field is set to ubuntu by default. Change it to the actual username used. -### 3. Run the Deployment +### 2. Run the Deployment > **Note:** > The `--models` argument selects a model using its **numeric ID** @@ -214,7 +206,7 @@ When prompted, choose option **1) Provision Enterprise Inference Cluster** and c If using Intel® Gaudi® hardware, make sure firmware and drivers are updated before running this script. -### 4. Verify the Deployment +### 3. Verify the Deployment Verify Pods Status ```bash kubectl get pods -A @@ -224,7 +216,7 @@ Expected States: - No CrashLoopBackOff - No Pending pods -### 5. Test the Inference +### 4. Test the Inference ```bash export BASE_URL=https://api.example.com From 9cff6bfe6b7b545fa361f5fe6a0c1644c919ff2f Mon Sep 17 00:00:00 2001 From: Harika Date: Thu, 12 Feb 2026 18:28:09 -0600 Subject: [PATCH 26/35] adding custom iso scripts and model deployment guides Signed-off-by: Harika --- third_party/Dell/README.md | 1 - third_party/Dell/ubuntu-22.04/iac/README.md | 98 +++++--- .../Dell/ubuntu-22.04/iac/iso/README.md | 110 +++++++++ .../Dell/ubuntu-22.04/iac/iso/custom-iso.sh | 215 ++++++++++++++++++ .../Dell/ubuntu-22.04/iac/model-deployment.md | 31 ++- .../Dell/ubuntu-22.04/iac/mount-iso.sh | 7 +- 6 files changed, 422 insertions(+), 40 deletions(-) delete mode 100644 third_party/Dell/README.md create mode 100644 third_party/Dell/ubuntu-22.04/iac/iso/README.md create mode 100644 third_party/Dell/ubuntu-22.04/iac/iso/custom-iso.sh diff --git a/third_party/Dell/README.md b/third_party/Dell/README.md deleted file mode 100644 index 43d98118..00000000 --- a/third_party/Dell/README.md +++ /dev/null @@ -1 +0,0 @@ -# PLACEHOLDER \ No newline at end of file diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md index 8fd512fb..f7d39240 100644 --- a/third_party/Dell/ubuntu-22.04/iac/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -8,11 +8,38 @@ The solution cleanly separates: - Boot orchestration (Terraform) - Post-OS configuration and inference deployment -It is designed for repeatable, resumable, and operator-friendly deployments. +--- + +## 0. Create Custom Ubuntu ISO (Optional but Recommended) + +**Script:** [custom-iso.sh](./iso/custom-iso.sh) + +Before mounting the Ubuntu ISO to iDRAC, you may optionally generate a custom Ubuntu 22.04.5 ISO designed for fully unattended installation. + +This enables a zero-touch, fully automated OS installation, eliminating manual input during setup and ensuring consistent, repeatable provisioning. + +**Host Your Custom ISO (Recommended for Automation)** + +After generating the ISO: + +1. Upload it to a web-accessible location (e.g., Firebase Hosting, internal web server, object storage, or any HTTP/HTTPS file server). + +2. Ensure the ISO is reachable via a public or internally accessible HTTP/HTTPS URL. + +3. Save this URL — it will be required in the next step when mounting the ISO using iDRAC Redfish Virtual Media. + +Example: +```bash +https://your-domain.com/ubuntu-22.04.5-custom.iso +``` + +If you already have a prebuilt ISO hosted and accessible via HTTP/HTTPS, you may skip this step and proceed to Step 1: Mount Ubuntu ISO. + +For detailed instructions on building a custom ISO, refer to: [ISO Creation Guide](./iso/README.md) --- -### 1. Mount Ubuntu ISO (iDRAC Redfish) +## 1. Mount Ubuntu ISO (iDRAC Redfish) **Script:** [iac/mount-iso.sh](./mount-iso.sh) @@ -27,6 +54,19 @@ export IDRAC_IP=100.67.x.x export IDRAC_USER=root export IDRAC_PASS=calvin ``` +**Specify Custom ISO URL** + +If you created and hosted a custom ISO in above step, pass its HTTP/HTTPS URL using: +```bash +export ISO_URL=https://your-domain.com/ubuntu-22.04.5-custom.iso +``` + +This should be the URL you generated and hosted in the previous step. + +You may also use any internally hosted ISO that is reachable by iDRAC. + +> Note: If ISO_URL is not provided, the script will automatically use the default Ubuntu 22.04 Live Server ISO. it will Launch the standard installer and Prompt for manual user input during OS installation. + **Mount ISO** ```bash chmod +x mount-iso.sh @@ -40,15 +80,9 @@ chmod +x mount-iso.sh Terraform uses the **Dell Redfish provider** to configure a **one-time boot from Virtual Media (CD)** and **force a reboot**. -Key Notes -- ISO must already be mounted using mount-iso.sh -- Boot override is set to Once -- Power reset is forced using redfish_power -- Boot mode (UEFI/Legacy) is not configurable on 17G servers - **Terraform Installation (Client Machine)** -> **Note:** Terraform is executed from a client machine (such as your laptop or a jump host), not from the target server or iDRAC. +Terraform is executed from a client machine (such as your laptop or a jump host), not from the target server or iDRAC. Install Terraform on the machine where you will run the Terraform commands. @@ -99,18 +133,6 @@ Once OS is installed, Download the deploy-enterprise-inference.sh script to your This script performs **all post-OS configuration** and deploys the **Enterprise Inference stack** on a **single node**. -**Key Features** -- Resume / checkpoint support -- Safe to re-run after failure -- CPU or Gaudi3 support -- Automated configuration of: - - Packages - - Repo clone + branch checkout - - Inventory & config files - - Firmware & kernel tuning (Gaudi3) - - SSH, sudo, certificates - - Final inference stack deployment - **Change permission to your file** ```bash @@ -127,19 +149,24 @@ sudo ./deploy-enterprise-inference.sh \ -a cluster-url \ -m "1" \ ``` - -| Option | Description | -| -------| ------------ | -| -u | OS username | -| -p | OS userpassword | -| -t | Hugging Face token | -| -g | gaudi3 or cpu | -| -m | Choose model ID from [Pre-Integrated Models List](#pre-integrated-models-list) , based on your deployment type (gaudi or cpu) | -| -b | Repo branch (default: release-1.4.0) | -| -a | cluster -url | -| -r | Resume from last checkpoint | -| -d | keycloak or genai, by default set to keycloak | -| -o | off or on, by default observability set to off | +### Options & Defaults + +| Option | Required | Default | Description | +|--------|----------|----------|-------------| +| `-u, --username` | Yes (deploy & uninstall) | (none) | Enterprise Inference owner username. Must match the invoking (sudo) user. | +| `-t, --token` | Yes (deploy only) | (none) | Hugging Face access token used to validate and download selected models. | +| `-p, --password` | No | `Linux123!` | User sudo password used for Ansible become operations. | +| `-g, --gpu-type` | No | `gaudi3` | Deployment target type: `gaudi3` or `cpu`. | +| `-m, --models` | No | `""` (interactive mode) | Choose model ID from [Pre-Integrated Models List](#pre-integrated-models-list) , based on your deployment type (gaudi or cpu) . If not provided, deployment runs interactively. | +| `-b, --branch` | No | `release-1.4.0` | Git branch of the Enterprise-Inference repository to clone. | +| `-f, --firmware-version` | No | `1.22.1` | Gaudi3 firmware version (applies only when `-g gaudi3`). | +| `-d, --deployment-mode` | No | `keycloak` | Deployment mode: `keycloak` (Keycloak + APISIX) or `genai` (GenAI Gateway). | +| `-o, --observability` | No | `off` | Enable observability components: `on` or `off`. | +| `-r, --resume` | No | Auto-detected | Resume deployment from last checkpoint if state file exists. | +| `-s, --state-file` | No | `/tmp/ei-deploy.state` | Custom path for deployment state tracking file. | +| `-a, --api-fqdn` | No | `api.example.com` | API Fully Qualified Domain Name used for `/etc/hosts` and TLS certificate generation. | +| `uninstall` | Yes (for uninstall action) | (none) | Removes deployed Enterprise Inference stack and cleans up state. | + **Resume After Failure** @@ -269,6 +296,7 @@ if EI is deployed with apisix, follow [Testing EI model with apisix](../EI/singl if EI is deployed with genai, follow [Testing EI model with genai](../EI/single-node/user-guide-genai.md#5-test-the-inference) for generating api-key and testing the inference --- + ## Additional Information ### Pre-Integrated Models List @@ -321,5 +349,3 @@ Refer to the [Model Deployment guide](./model-deployment.md) and run the interac This repository provides a clean, deterministic, enterprise-grade deployment pipeline for: Bare-metal Ubuntu + Enterprise Inference (CPU/Gaudi3) - - diff --git a/third_party/Dell/ubuntu-22.04/iac/iso/README.md b/third_party/Dell/ubuntu-22.04/iac/iso/README.md new file mode 100644 index 00000000..19448555 --- /dev/null +++ b/third_party/Dell/ubuntu-22.04/iac/iso/README.md @@ -0,0 +1,110 @@ +# Ubuntu Autoinstall ISO Builder + +This provides a single script (`custom-iso.sh`) that builds an Ubuntu Server autoinstall ISO. The script: + +- Generates `user-data` and `meta-data` on the fly from CLI options. +- Accepts a local ISO file path or downloads an ISO from a URL. +- Preserves boot settings from the input ISO so the output remains bootable. + +## Prerequisites + +### Linux +- `bash` +- `curl` +- `rsync` +- `xorriso` +- `sudo` +- `mount`/`umount` + +Example (Ubuntu/Debian): + +```bash +sudo apt-get update +sudo apt-get install -y curl rsync xorriso +``` + +### macOS +- `bash` (default) +- `curl` (default) +- `rsync` (Homebrew recommended) +- `xorriso` (Homebrew) + +Install on macOS: + +```bash +brew install xorriso rsync +``` + +## Usage + +The script requires `--hostname`, `--username`, and `--password-hash`. All other values have defaults. + +Basic example using a local ISO: + +```bash +./custom-iso.sh \ + --hostname=ubuntu-server-001 \ + --username=user \ + --iso=./ubuntu-22.04.5-live-server-amd64.iso +``` + +Full example with all options: + +```bash +./custom-iso.sh \ + --hostname=ubuntu-server-001 \ + --username=user \ + --password-hash='$6$Sl0xydNgA3rBk1Uo$Pj7oVVI7smkdBh20V8EyLivWpKDHFueUhvrfwxundGp/DQrAuTHjIxnrCZIMVJ1zcTIJ7VgIWKu0mUZmiRsqv0' \ + --ssh-key='' \ + --timezone=America/Chicago \ + --locale=en_US.UTF-8 \ + --kbd-layout=us \ + --kbd-variant='' \ + --storage-layout=direct \ + --instance-id=ubuntu-server-001 \ + --packages='' \ + --iso=./ubuntu-22.04.5-live-server-amd64.iso \ + --iso-url=https://releases.ubuntu.com/jammy/ubuntu-22.04.5-live-server-amd64.iso \ + --iso-name=ubuntu-22.04.5-live-server-amd64.iso \ + --out-iso=ubuntu-22.04.5-autoinstall.iso \ + --volid=Ubuntu-Server-22.04.5-AI +``` + +Notes: +- If `--iso` is provided, `--iso-url` and `--iso-name` are ignored. +- If `--ssh-key` is empty, no SSH key is embedded. +- `--packages` is a comma-separated list (e.g., `--packages=openssh-server,curl`). +- `--volid` must be 32 characters or fewer. + +## Options and Defaults + +| Option | Required | Default | Description | +|---|---|---|---| +| `--hostname` | Yes | (none) | Hostname for the installed system. | +| `--username` | Yes | (none) | Primary user name. | +| `--password-hash` | Yes | (none) | SHA-512 password hash for the user. | +| `--ssh-key` | No | `""` | SSH public key (optional). | +| `--timezone` | No | `America/Chicago` | System timezone. | +| `--locale` | No | `en_US.UTF-8` | Locale. | +| `--kbd-layout` | No | `us` | Keyboard layout. | +| `--kbd-variant` | No | `""` | Keyboard variant (optional). | +| `--storage-layout` | No | `direct` | Storage layout. Use `direct` for whole disk or `lvm`. | +| `--instance-id` | No | `ubuntu-server-001` | Cloud-init instance ID. | +| `--packages` | No | `""` | Comma-separated package list. | +| `--iso` | No | `""` | Local ISO file path. | +| `--iso-url` | No | Ubuntu 22.04.5 URL | Download ISO from URL. | +| `--iso-name` | No | `ubuntu-22.04.5-live-server-amd64.iso` | Filename for downloaded ISO. | +| `--out-iso` | No | `ubuntu-22.04.5-autoinstall.iso` | Output ISO filename. | +| `--volid` | No | `Ubuntu-Server-22.04.5-AI` | ISO volume ID (max 32 chars). | + +## Output + +The script writes the rebuilt ISO to the path specified by `--out-iso` (default `ubuntu-22.04.5-autoinstall.iso`). + +## UTM Test Notes (macOS) + +If testing in UTM and you see a `Shell>` prompt: +- On Apple Silicon, you must use **Emulate → x86_64** for this amd64 ISO. +- In the UEFI shell, type `exit`, then select the CD/DVD boot entry. + +If the installer logs repeatedly mention networking updates, verify UTM’s network mode is set to **Shared** or **Bridged**. diff --git a/third_party/Dell/ubuntu-22.04/iac/iso/custom-iso.sh b/third_party/Dell/ubuntu-22.04/iac/iso/custom-iso.sh new file mode 100644 index 00000000..4b148f19 --- /dev/null +++ b/third_party/Dell/ubuntu-22.04/iac/iso/custom-iso.sh @@ -0,0 +1,215 @@ +#!/usr/bin/env bash +set -euo pipefail + +ISO_URL="https://releases.ubuntu.com/jammy/ubuntu-22.04.5-live-server-amd64.iso" +ISO_NAME="ubuntu-22.04.5-live-server-amd64.iso" +ISO_PATH="" +OUT_ISO="ubuntu-22.04.5-autoinstall.iso" +VOLID="Ubuntu-Server-22.04.5-AI" + +HOSTNAME="" +USERNAME="" +PASSWORD_HASH='$6$Sl0xydNgA3rBk1Uo$Pj7oVVI7smkdBh20V8EyLivWpKDHFueUhvrfwxundGp/DQrAuTHjIxnrCZIMVJ1zcTIJ7VgIWKu0mUZmiRsqv0' +SSH_PUBLIC_KEY="" +TIMEZONE="America/Chicago" +LOCALE="en_US.UTF-8" +KBD_LAYOUT="us" +KBD_VARIANT="" +STORAGE_LAYOUT="direct" +INSTANCE_ID="ubuntu-server-001" +PACKAGES="" + +usage() { + cat </dev/null 2>&1; then + echo "Missing required command: $cmd" + exit 1 + fi +} + +for arg in "$@"; do + case "$arg" in + --hostname=*) HOSTNAME="${arg#*=}" ;; + --username=*) USERNAME="${arg#*=}" ;; + --password-hash=*) PASSWORD_HASH="${arg#*=}" ;; + --ssh-key=*) SSH_PUBLIC_KEY="${arg#*=}" ;; + --timezone=*) TIMEZONE="${arg#*=}" ;; + --locale=*) LOCALE="${arg#*=}" ;; + --kbd-layout=*) KBD_LAYOUT="${arg#*=}" ;; + --kbd-variant=*) KBD_VARIANT="${arg#*=}" ;; + --storage-layout=*) STORAGE_LAYOUT="${arg#*=}" ;; + --instance-id=*) INSTANCE_ID="${arg#*=}" ;; + --packages=*) PACKAGES="${arg#*=}" ;; + --iso=*) ISO_PATH="${arg#*=}" ;; + --iso-url=*) ISO_URL="${arg#*=}" ;; + --iso-name=*) ISO_NAME="${arg#*=}" ;; + --out-iso=*) OUT_ISO="${arg#*=}" ;; + --volid=*) VOLID="${arg#*=}" ;; + --help|-h) usage; exit 0 ;; + *) echo "Unknown option: $arg"; usage; exit 1 ;; + esac +done + +if [[ -z "$HOSTNAME" || -z "$USERNAME" || -z "$PASSWORD_HASH" ]]; then + echo "Missing required: --hostname, --username, --password-hash" + usage + exit 1 +fi + +if (( ${#VOLID} > 32 )); then + echo "VOLID too long (max 32 chars): $VOLID" + exit 1 +fi + +check_cmd curl +check_cmd rsync +check_cmd xorriso + +if [[ "$OSTYPE" != "darwin"* ]]; then + check_cmd mount + check_cmd umount +fi + +WORKDIR="$(mktemp -d)" +MNTDIR="$WORKDIR/mnt" +EXTRACTDIR="$WORKDIR/extract" +USERDATA_FILE="$WORKDIR/user-data" +METADATA_FILE="$WORKDIR/meta-data" + +cleanup() { + if mount | grep -q "$MNTDIR"; then + if [[ "$OSTYPE" == "darwin"* ]]; then + hdiutil detach "$MNTDIR" || true + else + sudo umount "$MNTDIR" || true + fi + fi + if [[ -d "$EXTRACTDIR" ]]; then + chmod -R u+rwX "$EXTRACTDIR" >/dev/null 2>&1 || true + fi + rm -rf "$WORKDIR" +} +trap cleanup EXIT + +if [[ -n "$ISO_PATH" ]]; then + if [[ ! -f "$ISO_PATH" ]]; then + echo "ISO not found: $ISO_PATH" + exit 1 + fi +else + if [[ -z "$ISO_NAME" ]]; then + ISO_NAME="$(basename "$ISO_URL")" + fi + ISO_PATH="$ISO_NAME" + if [[ ! -f "$ISO_PATH" ]]; then + echo "Downloading ISO..." + curl -L -o "$ISO_PATH" "$ISO_URL" + fi +fi + +mkdir -p "$MNTDIR" "$EXTRACTDIR" + +echo "Generating user-data and meta-data..." +{ + echo "#cloud-config" + echo "autoinstall:" + echo " version: 1" + echo " identity:" + echo " hostname: $HOSTNAME" + echo " username: $USERNAME" + echo " password: $PASSWORD_HASH" + echo " locale: $LOCALE" + echo " keyboard:" + echo " layout: $KBD_LAYOUT" + if [[ -n "$KBD_VARIANT" ]]; then + echo " variant: $KBD_VARIANT" + fi + echo " timezone: $TIMEZONE" + echo " ssh:" + echo " install-server: true" + if [[ -n "$SSH_PUBLIC_KEY" ]]; then + echo " authorized-keys:" + echo " - $SSH_PUBLIC_KEY" + fi + echo " storage:" + echo " layout:" + echo " name: $STORAGE_LAYOUT" + if [[ -n "$PACKAGES" ]]; then + echo " packages:" + IFS=',' read -r -a PKGS <<< "$PACKAGES" + for p in "${PKGS[@]}"; do + echo " - $p" + done + fi + echo " user-data:" + echo " disable_root: true" + echo " late-commands:" + echo " - curtin in-target --target=/target -- echo \"Autoinstall complete\" > /target/var/log/autoinstall_complete" +} > "$USERDATA_FILE" + +{ + echo "instance-id: $INSTANCE_ID" + echo "local-hostname: $HOSTNAME" +} > "$METADATA_FILE" + +if [[ "$OSTYPE" == "darwin"* ]]; then + echo "Extracting ISO with xorriso..." + xorriso -osirrox on -indev "$ISO_PATH" -extract / "$EXTRACTDIR" + chmod -R u+rwX "$EXTRACTDIR" >/dev/null 2>&1 || true +else + echo "Mounting ISO..." + sudo mount -o loop "$ISO_PATH" "$MNTDIR" + echo "Copying ISO contents..." + rsync -a "$MNTDIR"/ "$EXTRACTDIR"/ + echo "Unmounting ISO..." + sudo umount "$MNTDIR" +fi + +echo "Adding user-data and meta-data..." +cp "$USERDATA_FILE" "$EXTRACTDIR"/user-data +cp "$METADATA_FILE" "$EXTRACTDIR"/meta-data + +echo "Patching boot configs..." +for f in \ + "$EXTRACTDIR/boot/grub/grub.cfg" \ + "$EXTRACTDIR/boot/grub/loopback.cfg" \ + "$EXTRACTDIR/isolinux/txt.cfg" +do + if [[ -f "$f" ]]; then + sed -i.bak 's/---/autoinstall ds=nocloud\\;s=\/cdrom\/ ---/g' "$f" + fi +done + +echo "Rebuilding ISO..." +BOOT_ARGS_RAW="$( + xorriso -indev "$ISO_PATH" -report_el_torito as_mkisofs \ + | tail -n +2 \ + | grep -v "^-V " \ + | grep -v "^--modification-date=" +)" +BOOT_ARGS="$(echo "$BOOT_ARGS_RAW" | tr '\n' ' ')" +eval "xorriso -as mkisofs $BOOT_ARGS -V \"$VOLID\" -o \"$OUT_ISO\" -J -l -r \"$EXTRACTDIR\"" + +echo "Done: $OUT_ISO" diff --git a/third_party/Dell/ubuntu-22.04/iac/model-deployment.md b/third_party/Dell/ubuntu-22.04/iac/model-deployment.md index 00f26ca3..bc6b332e 100644 --- a/third_party/Dell/ubuntu-22.04/iac/model-deployment.md +++ b/third_party/Dell/ubuntu-22.04/iac/model-deployment.md @@ -13,8 +13,17 @@ ## 1. Overview -This guide outlines the standard procedure for deploying models on an Enterprise Inference cluster using the `inference-stack-deploy.sh` script. +This guide outlines the procedure for deploying and managing models on an existing Enterprise Inference cluster +The Enterprise Inference cluster must already be provisioned and operational before proceeding with model deployment. If the cluster is not yet deployed, [follow the deployment guide](README.md#3post-os-enterprise-inference-deployment) + +**This document covers:** + +Deploying models from the Enterprise Inference catalog + +Deploying custom models directly from Hugging Face + +Undeploying models safely --- ## 2. Environment Prerequisites @@ -41,6 +50,8 @@ This method deploys pre-integrated and validated models optimized for Enterprise **Step 1: Run the Deployment Script** ```bash +sudo ./deploy-enterprise-inference.sh -u -p -t -g gaudi3 -a + or bash ~/Enterprise-Inference/core/inference-stack-deploy.sh ``` @@ -74,6 +85,12 @@ Run the following command to verify that the model pod is in the `Running` state ```bash kubectl get pods ``` +Test the model inference. + +if EI is deployed with apisix, follow [Testing EI model with apisix](../EI/single-node/user-guide-apisix.md#5-test-the-inference) for generating token and testing the inference + +if EI is deployed with genai, follow [Testing EI model with genai](../EI/single-node/user-guide-genai.md#5-test-the-inference) for generating api-key and testing the inference + --- ### 3.2 Deploy Models Directly from Hugging Face @@ -82,6 +99,8 @@ This option allows deploying any Hugging Face model, including models not pre-va **Step 1: To deploy** ```bash +sudo ./deploy-enterprise-inference.sh -u -p -t -g gaudi3 -a + or bash ~/Enterprise-Inference/core/inference-stack-deploy.sh ``` **Step 2: Navigate Through the Menus** @@ -127,6 +146,12 @@ Run the following command to verify that the model pod is in the `Running` state ```bash kubectl get pods ``` +Test the model inference. + +if EI is deployed with apisix, follow [Testing EI model with apisix](../EI/single-node/user-guide-apisix.md#5-test-the-inference) for generating token and testing the inference + +if EI is deployed with genai, follow [Testing EI model with genai](../EI/single-node/user-guide-genai.md#5-test-the-inference) for generating api-key and testing the inference + --- ## 4. Undeploy Models @@ -141,6 +166,8 @@ This method is used for models deployed through pre-integrated and validated mod **Step 1: Run the Deployment Script** ```bash +sudo ./deploy-enterprise-inference.sh -u -p -t -g gaudi3 -a + or bash ~/Enterprise-Inference/core/inference-stack-deploy.sh ``` **Step 2: Navigate Through the Menus** @@ -182,6 +209,8 @@ To remove Models deployed via Deploy Model from Hugging Face **Step 1: Run the Script** ```bash +sudo ./deploy-enterprise-inference.sh -u -p -t -g gaudi3 -a + or bash ~/Enterprise-Inference/core/inference-stack-deploy.sh ``` **Step 2: Navigate Through the Menus** diff --git a/third_party/Dell/ubuntu-22.04/iac/mount-iso.sh b/third_party/Dell/ubuntu-22.04/iac/mount-iso.sh index b7ab76f8..574ff267 100644 --- a/third_party/Dell/ubuntu-22.04/iac/mount-iso.sh +++ b/third_party/Dell/ubuntu-22.04/iac/mount-iso.sh @@ -22,6 +22,8 @@ fi IDRAC_IP="${IDRAC_IP:-${IDRAC_HOST}}" IDRAC_USER="${IDRAC_USER:-${IDRAC_USERNAME}}" IDRAC_PASS="${IDRAC_PASS:-${IDRAC_PASSWORD}}" +DEFAULT_ISO_URL="https://releases.ubuntu.com/22.04/ubuntu-22.04.5-live-server-amd64.iso" +ISO_URL="${ISO_URL:-$DEFAULT_ISO_URL}" # Validate required environment variables if [ -z "$IDRAC_IP" ]; then @@ -42,7 +44,8 @@ if [ -z "$IDRAC_PASS" ]; then exit 1 fi -ISO_URL="https://releases.ubuntu.com/22.04/ubuntu-22.04.5-live-server-amd64.iso" + +#ISO_URL="https://releases.ubuntu.com/22.04/ubuntu-22.04.5-live-server-amd64.iso" SYSTEM_ID="System.Embedded.1" VIRTUAL_MEDIA_SLOT="1" @@ -185,4 +188,4 @@ else echo "❌ Failed to mount ISO. HTTP Code: $HTTP_CODE" echo "Response: $BODY" exit 1 -fi +fi \ No newline at end of file From 55da4ebc763e21189ab3ebd5a495e52713c52e46 Mon Sep 17 00:00:00 2001 From: Harika Date: Fri, 13 Feb 2026 15:12:30 -0600 Subject: [PATCH 27/35] Update README with clearer usage instructions Clarified usage instructions for required script variables and password hash format. Signed-off-by: Harika --- third_party/Dell/ubuntu-22.04/iac/iso/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/iac/iso/README.md b/third_party/Dell/ubuntu-22.04/iac/iso/README.md index 19448555..9193e963 100644 --- a/third_party/Dell/ubuntu-22.04/iac/iso/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/iso/README.md @@ -37,7 +37,7 @@ brew install xorriso rsync ## Usage -The script requires `--hostname`, `--username`, and `--password-hash`. All other values have defaults. +Set `--hostname`, `--username`, and `--password-hash` variables, these are need to run script. All other values have defaults. Basic example using a local ISO: @@ -45,7 +45,7 @@ Basic example using a local ISO: ./custom-iso.sh \ --hostname=ubuntu-server-001 \ --username=user \ - --iso=./ubuntu-22.04.5-live-server-amd64.iso + --password-hash='$6$Sl0xydNgA3rBk1Uo$Pj7oVVI7smkdBh20V8EyLivWpKDHFueUhvrfwxundGp/DQrAuTHjIxnrCZIMVJ1zcTIJ7VgIWKu0mUZmiRsqv0' \ ``` Full example with all options: @@ -82,7 +82,7 @@ Notes: |---|---|---|---| | `--hostname` | Yes | (none) | Hostname for the installed system. | | `--username` | Yes | (none) | Primary user name. | -| `--password-hash` | Yes | (none) | SHA-512 password hash for the user. | +| `--password-hash` | Yes | (none) | password should be in SHA-512 hash format. | | `--ssh-key` | No | `""` | SSH public key (optional). | | `--timezone` | No | `America/Chicago` | System timezone. | | `--locale` | No | `en_US.UTF-8` | Locale. | From 3f478cebe14235001a70adace4ea188b1f9826c9 Mon Sep 17 00:00:00 2001 From: Harika Date: Fri, 13 Feb 2026 15:14:39 -0600 Subject: [PATCH 28/35] Replace example values with placeholders in README Updated README with placeholders for hostname, username, and password hash. Signed-off-by: Harika --- third_party/Dell/ubuntu-22.04/iac/iso/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/iac/iso/README.md b/third_party/Dell/ubuntu-22.04/iac/iso/README.md index 9193e963..1d0a196d 100644 --- a/third_party/Dell/ubuntu-22.04/iac/iso/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/iso/README.md @@ -43,9 +43,9 @@ Basic example using a local ISO: ```bash ./custom-iso.sh \ - --hostname=ubuntu-server-001 \ - --username=user \ - --password-hash='$6$Sl0xydNgA3rBk1Uo$Pj7oVVI7smkdBh20V8EyLivWpKDHFueUhvrfwxundGp/DQrAuTHjIxnrCZIMVJ1zcTIJ7VgIWKu0mUZmiRsqv0' \ + --hostname= \ + --username= \ + --password-hash= \ ``` Full example with all options: From 410ad026c9e59ebfc43e927d325acf32c1948f9a Mon Sep 17 00:00:00 2001 From: Harika Date: Fri, 13 Feb 2026 15:21:58 -0600 Subject: [PATCH 29/35] update iso readme Signed-off-by: Harika --- third_party/Dell/ubuntu-22.04/iac/README.md | 63 ++++++++----------- .../Dell/ubuntu-22.04/iac/iso/README.md | 12 ++-- 2 files changed, 33 insertions(+), 42 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/iac/README.md b/third_party/Dell/ubuntu-22.04/iac/README.md index f7d39240..8154d3e8 100644 --- a/third_party/Dell/ubuntu-22.04/iac/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/README.md @@ -14,29 +14,23 @@ The solution cleanly separates: **Script:** [custom-iso.sh](./iso/custom-iso.sh) -Before mounting the Ubuntu ISO to iDRAC, you may optionally generate a custom Ubuntu 22.04.5 ISO designed for fully unattended installation. +> Note: If you already have a prebuilt ISO hosted and accessible via HTTP/HTTPS, you may skip this step and proceed to Step 1: Mount Ubuntu ISO. -This enables a zero-touch, fully automated OS installation, eliminating manual input during setup and ensuring consistent, repeatable provisioning. +Before mounting the Ubuntu ISO to iDRAC, generate a custom Ubuntu 22.04.5 ISO designed for zero-touch, fully automated OS installation, eliminating manual input during setup and ensuring consistent, repeatable provisioning. -**Host Your Custom ISO (Recommended for Automation)** - -After generating the ISO: - -1. Upload it to a web-accessible location (e.g., Firebase Hosting, internal web server, object storage, or any HTTP/HTTPS file server). +For detailed instructions on building a custom ISO, refer to: [ISO Creation Guide](./iso/README.md) -2. Ensure the ISO is reachable via a public or internally accessible HTTP/HTTPS URL. +### Host Your Custom ISO (Recommended for Automation) -3. Save this URL — it will be required in the next step when mounting the ISO using iDRAC Redfish Virtual Media. +After generating the ISO: +- Upload it to a web-accessible location (e.g., internal or external web server, object storage, or any HTTP/HTTPS file server). +- Ensure the ISO is reachable via a public or internally accessible HTTP/HTTPS URL. +- Save this URL, it will be required in the next step when mounting the ISO using iDRAC Redfish Virtual Media. Example: ```bash https://your-domain.com/ubuntu-22.04.5-custom.iso ``` - -If you already have a prebuilt ISO hosted and accessible via HTTP/HTTPS, you may skip this step and proceed to Step 1: Mount Ubuntu ISO. - -For detailed instructions on building a custom ISO, refer to: [ISO Creation Guide](./iso/README.md) - --- ## 1. Mount Ubuntu ISO (iDRAC Redfish) @@ -48,7 +42,7 @@ This script mounts or unmounts the **Ubuntu 22.04.5 live server ISO** using the - Mount ISO - Idempotent (skips if already mounted) -**Required Environment Variables** +### Required Environment Variables ```bash export IDRAC_IP=100.67.x.x export IDRAC_USER=root @@ -67,37 +61,37 @@ You may also use any internally hosted ISO that is reachable by iDRAC. > Note: If ISO_URL is not provided, the script will automatically use the default Ubuntu 22.04 Live Server ISO. it will Launch the standard installer and Prompt for manual user input during OS installation. -**Mount ISO** +### Mount ISO ```bash chmod +x mount-iso.sh ./mount-iso.sh ``` --- -## 2.Boot Ubuntu Installer (Terraform + Redfish) +## 2. Boot Ubuntu Installer (Terraform + Redfish) **Script:** [iac/main.tf](./main.tf) Terraform uses the **Dell Redfish provider** to configure a **one-time boot from Virtual Media (CD)** and **force a reboot**. -**Terraform Installation (Client Machine)** +### Terraform Installation (Client Machine) Terraform is executed from a client machine (such as your laptop or a jump host), not from the target server or iDRAC. -Install Terraform on the machine where you will run the Terraform commands. +Install Terraform on the machine where you will run the Terraform , if terraform is not already installed. -**Download Terraform:** +- **Download Terraform:** https://developer.hashicorp.com/terraform/install Choose the package for your operating system and follow the installation instructions. -**Verify Installation** +- **Verify Installation** ```bash terraform version ``` Terraform should return a version without errors. If Terraform is not found, ensure the installation directory is added to your system PATH. -**Terraform Variables** +### Terraform Variables The following variables must be explicitly provided in 'terraform.tfvars' for the Ubuntu installer boot workflow to function correctly. @@ -114,7 +108,7 @@ ubuntu_username = "user" ubuntu_password = "password" ``` -**Apply Terraform** +### Apply Terraform ```bash terraform init terraform apply @@ -133,12 +127,12 @@ Once OS is installed, Download the deploy-enterprise-inference.sh script to your This script performs **all post-OS configuration** and deploys the **Enterprise Inference stack** on a **single node**. -**Change permission to your file** +### Change permission to your file ```bash chmod +x deploy-enterprise-inference.sh ``` -**Required Parameters to run the script** +### Run the script ```bash sudo ./deploy-enterprise-inference.sh \ @@ -149,7 +143,7 @@ sudo ./deploy-enterprise-inference.sh \ -a cluster-url \ -m "1" \ ``` -### Options & Defaults +**Options & Defaults** | Option | Required | Default | Description | |--------|----------|----------|-------------| @@ -192,10 +186,7 @@ sudo ./deploy-enterprise-inference.sh -u user uninstall **State is tracked in:** -Deployment progress is tracked using a local state file: -```bash -/tmp/ei-deploy.state -``` +Deployment progress is tracked using a local state file: `/tmp/ei-deploy.state` **What the Deployment Script Does** @@ -215,7 +206,7 @@ Deployment progress is tracked using a local state file: After a successful deployment, verify the system at three levels: OS, Enterprise Inference services, and model inference. -**1. OS & System Validation** +### 1. OS & System Validation Verify the node is healthy and running the expected kernel. ```bash hostname @@ -233,7 +224,7 @@ df -h free -h ``` -**2. Enterprise Inference Services** +### 2. Enterprise Inference Services Verify all inference services are running. ```bash kubectl get pods -A @@ -247,7 +238,7 @@ Check systemd services manually if needed: systemctl list-units --type=service | grep -i inference ``` -**3. Gaudi3 Verification (Only if -g gaudi3)** +### 3. Gaudi3 Verification (Only if -g gaudi3) Confirm Gaudi devices and firmware are detected. ```bash hl-smi @@ -261,7 +252,7 @@ Verify kernel modules: lsmod | grep habanalabs ``` -**4. API & Networking Validation** +### 4. API & Networking Validation Verify hostname resolution: ```bash cat /etc/hosts | grep api.example.com @@ -279,7 +270,7 @@ Expected: - key.pem -**5. API Health Check** +### 5. API Health Check Validate the inference gateway is reachable. ```bash curl -k https://api.example.com/health @@ -289,7 +280,7 @@ Expected: --- -**6. Test Model Inference** +### 6. Test Model Inference if EI is deployed with apisix, follow [Testing EI model with apisix](../EI/single-node/user-guide-apisix.md#5-test-the-inference) for generating token and testing the inference diff --git a/third_party/Dell/ubuntu-22.04/iac/iso/README.md b/third_party/Dell/ubuntu-22.04/iac/iso/README.md index 1d0a196d..7e148f90 100644 --- a/third_party/Dell/ubuntu-22.04/iac/iso/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/iso/README.md @@ -37,15 +37,15 @@ brew install xorriso rsync ## Usage -Set `--hostname`, `--username`, and `--password-hash` variables, these are need to run script. All other values have defaults. +The script requires `--hostname`, `--username`, and `--password-hash`. All other values have defaults. Basic example using a local ISO: ```bash ./custom-iso.sh \ - --hostname= \ - --username= \ - --password-hash= \ + --hostname=ubuntu-server-001 \ + --username=user \ + --iso=./ubuntu-22.04.5-live-server-amd64.iso ``` Full example with all options: @@ -82,7 +82,7 @@ Notes: |---|---|---|---| | `--hostname` | Yes | (none) | Hostname for the installed system. | | `--username` | Yes | (none) | Primary user name. | -| `--password-hash` | Yes | (none) | password should be in SHA-512 hash format. | +| `--password-hash` | Yes | (none) | SHA-512 password hash for the user. | | `--ssh-key` | No | `""` | SSH public key (optional). | | `--timezone` | No | `America/Chicago` | System timezone. | | `--locale` | No | `en_US.UTF-8` | Locale. | @@ -107,4 +107,4 @@ If testing in UTM and you see a `Shell>` prompt: - On Apple Silicon, you must use **Emulate → x86_64** for this amd64 ISO. - In the UEFI shell, type `exit`, then select the CD/DVD boot entry. -If the installer logs repeatedly mention networking updates, verify UTM’s network mode is set to **Shared** or **Bridged**. +If the installer logs repeatedly mention networking updates, verify UTM’s network mode is set to **Shared** or **Bridged**. \ No newline at end of file From 4e5fe2215b513b503d86e14e5ef84081f86643e9 Mon Sep 17 00:00:00 2001 From: Harika Date: Tue, 17 Feb 2026 09:48:52 -0600 Subject: [PATCH 30/35] update keycloak values with defaults Signed-off-by: Harika --- .../Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh b/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh index 40e52d7e..b471aa47 100644 --- a/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh +++ b/third_party/Dell/ubuntu-22.04/iac/deploy-enterprise-inference.sh @@ -38,9 +38,9 @@ GPU_TYPE="Enter gaudi3/cpu based on your deployment" MODELS="" DEPLOYMENT_MODE="keycloak" DEPLOY_OBSERVABILITY="off" -KEYCLOAK_CLIENT_ID="api" -KEYCLOAK_ADMIN_USER="api-admin" -KEYCLOAK_ADMIN_PASSWORD="changeme!!" +KEYCLOAK_CLIENT_ID="my-client-id" +KEYCLOAK_ADMIN_USER="your-keycloak-admin-user" +KEYCLOAK_ADMIN_PASSWORD="changeme" FIRMWARE_VERSION="1.22.1" STATE_FILE="/tmp/ei-deploy.state" BRANCH="release-1.4.0" From e481c1fb699485bde96679780f6b8bcb7f4f71ee Mon Sep 17 00:00:00 2001 From: Harika Date: Tue, 17 Feb 2026 12:27:44 -0600 Subject: [PATCH 31/35] adding code scan file Signed-off-by: Harika --- .github/workflows/code-scans.yaml | 167 ++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 .github/workflows/code-scans.yaml diff --git a/.github/workflows/code-scans.yaml b/.github/workflows/code-scans.yaml new file mode 100644 index 00000000..3d37937e --- /dev/null +++ b/.github/workflows/code-scans.yaml @@ -0,0 +1,167 @@ +name: SDLE Scans + +on: + workflow_dispatch: + inputs: + PR_number: + description: 'Pull request number' + required: true + push: + branches: [ main ] + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + +concurrency: + group: sdle-${{ github.event.inputs.PR_number || github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +permissions: + contents: read + actions: read + +jobs: + +# ----------------------------- +# 1) Trivy Scan +# ----------------------------- + trivy_scan: + name: Trivy Vulnerability Scan + runs-on: self-hosted + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.PR_number && format('refs/pull/{0}/merge', github.event.inputs.PR_number) || '' }} + + - name: Create report directory + run: mkdir -p trivy-reports + + - name: Run Trivy FS Scan + uses: aquasecurity/trivy-action@0.28.0 + continue-on-error: true + with: + scan-type: 'fs' + scan-ref: '.' + scanners: 'vuln,misconfig,secret' + severity: 'CRITICAL,HIGH' + format: 'table' + output: 'trivy-reports/trivy_scan_report.txt' + + - name: Run Trivy Image Scan - vllm-cpu + uses: aquasecurity/trivy-action@0.28.0 + continue-on-error: true + with: + scan-type: 'image' + image-ref: 'public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.2' + severity: 'HIGH,CRITICAL' + format: 'table' + output: 'trivy-reports/trivy-vllm-cpu.txt' + + - name: Upload Trivy Reports + if: always() + uses: actions/upload-artifact@v4 + with: + name: trivy-reports + path: trivy-reports/ + + - name: Show Trivy FS Report in Logs + if: always() + run: | + echo "========= TRIVY FS SCAN FINDINGS =========" + cat trivy-reports/trivy_scan_report.txt || echo "No FS scan report found" + echo "==========================================" + +# ----------------------------- +# 2) Bandit Scan +# ----------------------------- + bandit_scan: + name: Bandit security scan + runs-on: self-hosted + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.PR_number && format('refs/pull/{0}/merge', github.event.inputs.PR_number) || '' }} + submodules: 'recursive' + fetch-depth: 0 + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install Bandit + run: pip install bandit + - name: Create Bandit configuration + run: | + cat > .bandit << 'EOF' + [bandit] + exclude_dirs = tests,test,venv,.venv,node_modules + skips = B101 + EOF + shell: bash + - name: Run Bandit scan + run: | + bandit -r . -ll -iii -f screen + bandit -r . -ll -iii -f html -o bandit-report.html + - name: Upload Bandit Report + uses: actions/upload-artifact@v4 + with: + name: bandit-report + path: bandit-report.html + retention-days: 30 +# ----------------------------- +# 3) ShellCheck Scan +# ----------------------------- + shellcheck_scan: + name: ShellCheck script analysis + runs-on: self-hosted + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.PR_number && format('refs/pull/{0}/merge', github.event.inputs.PR_number) || '' }} + + - name: Create report directory + run: mkdir -p shellcheck-reports + + - name: Install ShellCheck + run: | + # Check if shellcheck is already installed + if ! command -v shellcheck &> /dev/null; then + wget -qO- "https://github.com/koalaman/shellcheck/releases/download/stable/shellcheck-stable.linux.x86_64.tar.xz" | tar -xJv + sudo cp shellcheck-stable/shellcheck /usr/local/bin/ + rm -rf shellcheck-stable + fi + shellcheck --version + + - name: Find shell scripts + id: find_scripts + run: | + SCRIPT_COUNT=$(find . -type f -name "*.sh" ! -path "./.git/*" | wc -l) + echo "Shell scripts found: $SCRIPT_COUNT" + echo "script_count=$SCRIPT_COUNT" >> $GITHUB_OUTPUT + + - name: Run ShellCheck + if: steps.find_scripts.outputs.script_count > 0 + continue-on-error: true + run: | + echo "ShellCheck Analysis Report" > shellcheck-reports/shellcheck-report.txt + echo "==========================" >> shellcheck-reports/shellcheck-report.txt + echo "" >> shellcheck-reports/shellcheck-report.txt + + find . -type f -name "*.sh" ! -path "./.git/*" | while read -r script; do + echo "Checking: $script" >> shellcheck-reports/shellcheck-report.txt + shellcheck -f gcc "$script" >> shellcheck-reports/shellcheck-report.txt 2>&1 || true + echo "" >> shellcheck-reports/shellcheck-report.txt + done + + cat shellcheck-reports/shellcheck-report.txt + + - name: Create empty report if no scripts + if: steps.find_scripts.outputs.script_count == 0 + run: | + echo "ShellCheck Analysis Report" > shellcheck-reports/shellcheck-report.txt + echo "No shell scripts found to analyze." >> shellcheck-reports/shellcheck-report.txt + + - name: Upload ShellCheck Report + if: always() + uses: actions/upload-artifact@v4 + with: + name: shellcheck-report + path: shellcheck-reports/shellcheck-report.txt \ No newline at end of file From ce2f3740c9690a77109829478a3ea12b038b0eb5 Mon Sep 17 00:00:00 2001 From: AhmedSeemalK <100253923+AhmedSeemalK@users.noreply.github.com> Date: Fri, 13 Feb 2026 12:26:58 +0530 Subject: [PATCH 32/35] Release v1.5.0 (#62) Signed-off-by: amberjain1 Signed-off-by: psurabh Signed-off-by: mdfaheem-intel Signed-off-by: vivekrsintc Signed-off-by: Github Actions Co-authored-by: Github Actions Co-authored-by: vhpintel Co-authored-by: sgurunat Co-authored-by: jaswanth8888 Co-authored-by: sandeshk-intel Co-authored-by: vinayK34 Signed-off-by: Harika --- core/helm-charts/apisix-helm/Chart.yaml | 2 +- .../apisix-helm/openshift-values.yaml | 14 +- core/helm-charts/apisix-helm/values.yaml | 14 +- core/helm-charts/ceph/operator-values.yaml | 2 +- core/helm-charts/fluentbit/fluebit-values.yml | 2 +- .../fluentbit/fluentbit-config.yml | 2 +- .../charts/langfuse/Chart.yaml | 2 +- .../charts/langfuse/values.lint.yaml | 2 +- .../charts/langfuse/values.yaml | 34 +- core/helm-charts/genai-gateway/Chart.yaml | 2 +- .../genai-gateway/templates/configmap.yaml | 2 +- .../genai-gateway/templates/deployment.yaml | 2 +- .../genai-gateway/templates/ingress.yaml | 2 +- .../genai-gateway/templates/ingress_eks.yaml | 7 +- .../genai-gateway/templates/job-rbac.yaml | 2 +- .../templates/postgres-backup-cronjob.yaml | 2 +- .../templates/postgres-backup-pvc.yaml | 2 +- .../templates/postgres-restore-job.yaml | 2 +- .../genai-gateway/templates/route.yaml | 2 +- .../genai-gateway/templates/service.yaml | 2 +- core/helm-charts/genai-gateway/values.yaml | 14 +- core/helm-charts/istio/peer-auth-ingress.yaml | 2 +- .../istio/peer-authentication.yaml | 2 +- core/helm-charts/keycloak/Chart.yaml | 2 +- .../keycloak/templates/apisixTls.yaml | 2 +- .../keycloak/templates/apisixroute.yaml | 2 +- .../keycloak/templates/ingress.yaml | 2 +- .../keycloak/templates/ingress_eks.yaml | 8 +- .../helm-charts/keycloak/templates/route.yaml | 2 +- core/helm-charts/keycloak/values.yaml | 2 +- .../mcp-server-template/Chart.yaml | 9 + .../templates/_helpers.tpl | 60 + .../templates/apisixroute.yaml | 65 + .../templates/deployment.yaml | 102 + .../mcp-server-template/templates/hpa.yaml | 33 + .../templates/ingress.yaml | 46 + .../templates/networkpolicy.yaml | 23 + .../templates/poddisruptionbudget.yaml | 19 + .../mcp-server-template/templates/secret.yaml | 20 + .../templates/service.yaml | 20 + .../templates/serviceaccount.yaml | 13 + .../mcp-server-template/values.yaml | 206 ++ .../habana-exporter/habana-metrics.yml | 2 +- .../observability/logs-stack/Chart.yaml | 2 +- .../logs-stack/aws-s3-values.yaml | 2 +- .../templates/grafana-datasources-loki.yaml | 2 +- .../observability/logs-stack/values.yaml | 2 +- core/helm-charts/ovms/Chart.yaml | 2 +- core/helm-charts/ovms/templates/_helpers.tpl | 2 +- .../ovms/templates/apisixroute.yaml | 2 +- .../ovms/templates/deployment.yaml | 2 +- core/helm-charts/ovms/templates/ingress.yaml | 2 +- core/helm-charts/ovms/templates/pvc.yaml | 50 +- core/helm-charts/ovms/templates/secret.yaml | 2 +- core/helm-charts/ovms/templates/service.yaml | 2 +- core/helm-charts/ovms/values.yaml | 2 +- core/helm-charts/tei/Chart.yaml | 2 +- core/helm-charts/tei/ci-gaudi-values.yaml | 2 +- core/helm-charts/tei/ci-values.yaml | 2 +- core/helm-charts/tei/gaudi-values.yaml | 11 +- core/helm-charts/tei/ingress_eks.yaml | 2 +- .../tei/templates/apisixroutes.yaml | 2 +- core/helm-charts/tei/templates/configmap.yaml | 2 +- .../helm-charts/tei/templates/deployment.yaml | 2 +- core/helm-charts/tei/templates/ingress.yaml | 2 +- .../tei/templates/ingress_eks.yaml | 14 +- core/helm-charts/tei/templates/pvc.yaml | 2 +- core/helm-charts/tei/templates/route.yaml | 2 +- core/helm-charts/tei/templates/secret.yaml | 2 +- core/helm-charts/tei/templates/service.yaml | 2 +- .../tei/templates/servicemonitor.yaml | 2 +- core/helm-charts/tei/values.yaml | 14 +- core/helm-charts/teirerank/Chart.yaml | 2 +- .../teirerank/ci-gaudi-values.yaml | 2 +- core/helm-charts/teirerank/ci-values.yaml | 2 +- core/helm-charts/teirerank/gaudi-values.yaml | 10 +- .../teirerank/templates/apisixroutes.yaml | 2 +- .../teirerank/templates/configmap.yaml | 2 +- .../teirerank/templates/deployment.yaml | 2 +- .../teirerank/templates/ingress.yaml | 2 +- .../teirerank/templates/ingress_eks.yaml | 14 +- core/helm-charts/teirerank/templates/pvc.yaml | 2 +- .../teirerank/templates/route.yaml | 2 +- .../teirerank/templates/secret.yaml | 2 +- .../teirerank/templates/service.yaml | 2 +- .../teirerank/templates/servicemonitor.yaml | 2 +- core/helm-charts/teirerank/values.yaml | 14 +- core/helm-charts/tgi/Chart.yaml | 2 +- core/helm-charts/tgi/ci-gaudi-values.yaml | 2 +- core/helm-charts/tgi/ci-values.yaml | 2 +- core/helm-charts/tgi/gaudi-values.yaml | 10 +- .../tgi/templates/apisixroutes.yaml | 2 +- core/helm-charts/tgi/templates/configmap.yaml | 2 +- .../helm-charts/tgi/templates/deployment.yaml | 2 +- core/helm-charts/tgi/templates/ingress.yaml | 2 +- .../tgi/templates/ingress_eks.yaml | 14 +- core/helm-charts/tgi/templates/pvc.yaml | 2 +- core/helm-charts/tgi/templates/route.yaml | 2 +- core/helm-charts/tgi/templates/secret.yaml | 2 +- core/helm-charts/tgi/templates/service.yaml | 2 +- .../tgi/templates/servicemonitor.yaml | 2 +- core/helm-charts/tgi/values.yaml | 14 +- core/helm-charts/vllm/Chart.yaml | 2 +- core/helm-charts/vllm/ci-gaudi-values.yaml | 2 +- core/helm-charts/vllm/ci-values.yaml | 2 +- core/helm-charts/vllm/gaudi-values.yaml | 10 +- core/helm-charts/vllm/gaudi3-values.yaml | 10 +- .../vllm/templates/apisixroutes.yaml | 4 +- .../helm-charts/vllm/templates/configmap.yaml | 2 +- .../vllm/templates/deployment.yaml | 7 +- core/helm-charts/vllm/templates/ingress.yaml | 2 +- .../vllm/templates/ingress_eks.yaml | 15 +- core/helm-charts/vllm/templates/pvc.yaml | 2 +- core/helm-charts/vllm/templates/route.yaml | 2 +- core/helm-charts/vllm/templates/secret.yaml | 2 +- core/helm-charts/vllm/templates/service.yaml | 2 +- .../vllm/templates/servicemonitor.yaml | 2 +- core/helm-charts/vllm/values.yaml | 14 +- core/helm-charts/vllm/xeon-values.yaml | 17 +- core/inference-stack-deploy.sh | 2 +- core/inventory/inference-config.cfg | 5 +- core/inventory/metadata/addons.yml | 2 +- core/inventory/metadata/all.yml | 2 +- .../metadata/vars/inference_common.yml | 4 +- .../metadata/vars/inference_delegate.yml | 2 +- .../metadata/vars/inference_genai_gateway.yml | 2 +- .../vars/inference_keycloak_apisix.yml | 2 +- .../metadata/vars/inference_llm_models.yml | 9 +- .../metadata/vars/inference_observability.yml | 2 +- core/lib/add-node.sh | 2 +- .../lib/cluster/config/cluster-config-init.sh | 2 +- core/lib/cluster/config/label-nodes.sh | 2 +- .../config/setup-user-cluster-config.sh | 2 +- core/lib/cluster/deployment/cluster-purge.sh | 2 +- core/lib/cluster/deployment/cluster-update.sh | 2 +- core/lib/cluster/deployment/fresh-install.sh | 26 +- core/lib/cluster/drv-fw-update.sh | 2 +- core/lib/cluster/nodes/add-node.sh | 2 +- core/lib/cluster/nodes/remove-node.sh | 2 +- core/lib/cluster/state/cluster-state-check.sh | 2 +- .../components/genai-gateway-controller.sh | 2 +- core/lib/components/ingress-controller.sh | 2 +- core/lib/components/intel-base-operator.sh | 2 +- core/lib/components/keycloak-controller.sh | 2 +- core/lib/components/kubernetes-setup.sh | 2 +- .../components/observability-controller.sh | 2 +- .../components/service-mesh/install-istio.sh | 2 +- .../storage/install-ceph-cluster.sh | 2 +- .../storage/uninstall-ceph-cluster.sh | 2 +- core/lib/models/install-model-hf.sh | 2 +- core/lib/models/install-model.sh | 22 +- core/lib/models/list-model.sh | 2 +- core/lib/models/model-selection.sh | 2 +- core/lib/models/uninstall-model-hf.sh | 2 +- core/lib/models/uninstall-model.sh | 2 +- core/lib/system/config-vars.sh | 2 +- core/lib/system/execute-and-check.sh | 2 +- core/lib/system/precheck/prereq-check.sh | 2 +- core/lib/system/precheck/read-config-file.sh | 2 +- core/lib/system/precheck/readiness-check.sh | 2 +- core/lib/system/setup-env.sh | 6 +- core/lib/user-menu/parse-user-prompts.sh | 2 +- core/lib/user-menu/user-menu.sh | 2 +- core/lib/xeon/ballon-policy.sh | 2 +- core/playbooks/deploy-ceph-storage.yml | 2 +- core/playbooks/deploy-cluster-config.yml | 2 +- core/playbooks/deploy-cpu-optimization.yml | 2 +- .../deploy-gaudi-firmware-driver.yml | 2 +- core/playbooks/deploy-genai-gateway.yml | 5 +- core/playbooks/deploy-habana-ai-operator.yml | 2 +- core/playbooks/deploy-inference-models.yml | 134 +- core/playbooks/deploy-ingress-controller.yml | 15 +- core/playbooks/deploy-istio-openshift.yml | 2 +- core/playbooks/deploy-istio.yml | 2 +- core/playbooks/deploy-keycloak-controller.yml | 2 +- core/playbooks/deploy-keycloak-service.yml | 2 +- core/playbooks/deploy-keycloak-tls-cert.yml | 61 +- .../deploy-observability-openshift.yml | 2 +- core/playbooks/deploy-observability.yml | 6 +- core/playbooks/generate-ceph-values.yml | 2 +- core/playbooks/inference-precheck.yml | 2 +- core/playbooks/label-nodes.yml | 14 +- .../register-model-genai-gateway.yml | 2 +- core/playbooks/reset.yml | 2 +- core/playbooks/setup-bastion.yml | 1204 ++++++++++ core/playbooks/setup-user-kubeconfig.yml | 2 +- core/roles/inference-precheck/tasks/main.yml | 6 +- core/roles/inference-tools/meta/main.yml | 2 +- core/roles/inference-tools/tasks/main.yml | 2 +- core/roles/kubernetes-precheck/tasks/main.yml | 2 +- .../roles/nri_cpu_balloons/defaults/main.yaml | 2 +- .../nri_cpu_balloons/tasks/install_nri.yaml | 2 +- .../tasks/install_nri_openshift.yaml | 2 +- core/roles/nri_cpu_balloons/tasks/main.yaml | 208 +- .../templates/generic-balloon-values.yaml.j2 | 42 +- .../utils/files/compute_reserved_cpus.sh | 71 + .../tasks/get_optimized_cpu_topology.yaml | 84 +- core/scripts/gaudi-firmware-driver-updater.sh | 2 +- core/scripts/generate-vault-secrets.sh | 4 + core/scripts/keycloak-fetch-client-secret.sh | 2 +- core/scripts/keycloak-realmcreation.sh | 2 +- core/scripts/vllm-quickstart/README.md | 194 ++ core/scripts/vllm-quickstart/models.json | 58 + .../vllm-quickstart/vllm-model-runner.sh | 833 +++++++ docs/README.md | 1 + docs/cpu-optimization-guide.md | 33 +- docs/examples/multi-node/inference-config.cfg | 5 +- .../single-node/einf-singlenode-gaudi.yml | 2 +- .../single-node/einf-singlenode-xeon.yml | 2 +- .../examples/single-node/inference-config.cfg | 3 + docs/getting-started-deploy-MCP.md | 468 ++++ docs/prerequisites.md | 2 +- docs/single-node-deployment.md | 6 + .../agenticai/docs/agenticai-quickstart.md | 272 +++ .../playbooks/deploy-agenticai-plugin.yml | 222 ++ .../agenticai/templates/software-team.json | 2081 +++++++++++++++++ .../agenticai/vars/agenticai-plugin-vars.yml | 72 + 217 files changed, 6774 insertions(+), 649 deletions(-) create mode 100644 core/helm-charts/mcp-server-template/Chart.yaml create mode 100644 core/helm-charts/mcp-server-template/templates/_helpers.tpl create mode 100644 core/helm-charts/mcp-server-template/templates/apisixroute.yaml create mode 100644 core/helm-charts/mcp-server-template/templates/deployment.yaml create mode 100644 core/helm-charts/mcp-server-template/templates/hpa.yaml create mode 100644 core/helm-charts/mcp-server-template/templates/ingress.yaml create mode 100644 core/helm-charts/mcp-server-template/templates/networkpolicy.yaml create mode 100644 core/helm-charts/mcp-server-template/templates/poddisruptionbudget.yaml create mode 100644 core/helm-charts/mcp-server-template/templates/secret.yaml create mode 100644 core/helm-charts/mcp-server-template/templates/service.yaml create mode 100644 core/helm-charts/mcp-server-template/templates/serviceaccount.yaml create mode 100644 core/helm-charts/mcp-server-template/values.yaml create mode 100644 core/roles/utils/files/compute_reserved_cpus.sh create mode 100644 core/scripts/vllm-quickstart/README.md create mode 100644 core/scripts/vllm-quickstart/models.json create mode 100755 core/scripts/vllm-quickstart/vllm-model-runner.sh create mode 100644 docs/getting-started-deploy-MCP.md create mode 100644 plugins/agenticai/docs/agenticai-quickstart.md create mode 100644 plugins/agenticai/playbooks/deploy-agenticai-plugin.yml create mode 100644 plugins/agenticai/templates/software-team.json create mode 100644 plugins/agenticai/vars/agenticai-plugin-vars.yml diff --git a/core/helm-charts/apisix-helm/Chart.yaml b/core/helm-charts/apisix-helm/Chart.yaml index 3a1aeb0c..d47841f4 100644 --- a/core/helm-charts/apisix-helm/Chart.yaml +++ b/core/helm-charts/apisix-helm/Chart.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v2 name: auth-apisix diff --git a/core/helm-charts/apisix-helm/openshift-values.yaml b/core/helm-charts/apisix-helm/openshift-values.yaml index 1fb2d339..c7b2ea63 100644 --- a/core/helm-charts/apisix-helm/openshift-values.yaml +++ b/core/helm-charts/apisix-helm/openshift-values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # APISIX Namespace @@ -33,9 +33,9 @@ apisix: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists @@ -76,9 +76,9 @@ apisix: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists @@ -105,9 +105,9 @@ apisix: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists diff --git a/core/helm-charts/apisix-helm/values.yaml b/core/helm-charts/apisix-helm/values.yaml index 46b68806..871b9a7e 100644 --- a/core/helm-charts/apisix-helm/values.yaml +++ b/core/helm-charts/apisix-helm/values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # APISIX Namespace @@ -28,9 +28,9 @@ apisix: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists @@ -64,9 +64,9 @@ apisix: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists @@ -93,9 +93,9 @@ apisix: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists diff --git a/core/helm-charts/ceph/operator-values.yaml b/core/helm-charts/ceph/operator-values.yaml index 75f57b68..048afb09 100644 --- a/core/helm-charts/ceph/operator-values.yaml +++ b/core/helm-charts/ceph/operator-values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 image: repository: docker.io/rook/ceph diff --git a/core/helm-charts/fluentbit/fluebit-values.yml b/core/helm-charts/fluentbit/fluebit-values.yml index e61eea8b..66179eda 100644 --- a/core/helm-charts/fluentbit/fluebit-values.yml +++ b/core/helm-charts/fluentbit/fluebit-values.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 fluent-bit: diff --git a/core/helm-charts/fluentbit/fluentbit-config.yml b/core/helm-charts/fluentbit/fluentbit-config.yml index b73c13b3..71963f7f 100644 --- a/core/helm-charts/fluentbit/fluentbit-config.yml +++ b/core/helm-charts/fluentbit/fluentbit-config.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 data: diff --git a/core/helm-charts/genai-gateway-trace/charts/langfuse/Chart.yaml b/core/helm-charts/genai-gateway-trace/charts/langfuse/Chart.yaml index f0958b17..1430c0d3 100644 --- a/core/helm-charts/genai-gateway-trace/charts/langfuse/Chart.yaml +++ b/core/helm-charts/genai-gateway-trace/charts/langfuse/Chart.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v2 name: langfuse diff --git a/core/helm-charts/genai-gateway-trace/charts/langfuse/values.lint.yaml b/core/helm-charts/genai-gateway-trace/charts/langfuse/values.lint.yaml index e1ede1ac..8bd8b62f 100644 --- a/core/helm-charts/genai-gateway-trace/charts/langfuse/values.lint.yaml +++ b/core/helm-charts/genai-gateway-trace/charts/langfuse/values.lint.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # This file is used to validate the Chart and sets some values for required fields. It is not used for the deployment. # Example usage: `helm lint . --values values.lint.yaml` diff --git a/core/helm-charts/genai-gateway-trace/charts/langfuse/values.yaml b/core/helm-charts/genai-gateway-trace/charts/langfuse/values.yaml index ea4b2ed1..b45662f4 100644 --- a/core/helm-charts/genai-gateway-trace/charts/langfuse/values.yaml +++ b/core/helm-charts/genai-gateway-trace/charts/langfuse/values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Langfuse Helm Chart Configuration @@ -148,9 +148,9 @@ langfuse: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists @@ -509,9 +509,9 @@ postgresql: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists @@ -536,9 +536,9 @@ postgresql: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists @@ -596,9 +596,9 @@ redis: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists @@ -625,9 +625,9 @@ redis: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists @@ -660,9 +660,9 @@ clickhouse: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists @@ -690,9 +690,9 @@ clickhouse: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists @@ -887,9 +887,9 @@ s3: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists diff --git a/core/helm-charts/genai-gateway/Chart.yaml b/core/helm-charts/genai-gateway/Chart.yaml index da04ef9b..7909afe5 100644 --- a/core/helm-charts/genai-gateway/Chart.yaml +++ b/core/helm-charts/genai-gateway/Chart.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v2 name: genaigateway diff --git a/core/helm-charts/genai-gateway/templates/configmap.yaml b/core/helm-charts/genai-gateway/templates/configmap.yaml index ffb8aeac..4a1330b7 100644 --- a/core/helm-charts/genai-gateway/templates/configmap.yaml +++ b/core/helm-charts/genai-gateway/templates/configmap.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 kind: ConfigMap diff --git a/core/helm-charts/genai-gateway/templates/deployment.yaml b/core/helm-charts/genai-gateway/templates/deployment.yaml index 310279e5..c2259045 100644 --- a/core/helm-charts/genai-gateway/templates/deployment.yaml +++ b/core/helm-charts/genai-gateway/templates/deployment.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: apps/v1 kind: Deployment diff --git a/core/helm-charts/genai-gateway/templates/ingress.yaml b/core/helm-charts/genai-gateway/templates/ingress.yaml index f45b847d..67980d4a 100644 --- a/core/helm-charts/genai-gateway/templates/ingress.yaml +++ b/core/helm-charts/genai-gateway/templates/ingress.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.ingress.enabled }} apiVersion: networking.k8s.io/v1 diff --git a/core/helm-charts/genai-gateway/templates/ingress_eks.yaml b/core/helm-charts/genai-gateway/templates/ingress_eks.yaml index 6d61a174..367a5396 100644 --- a/core/helm-charts/genai-gateway/templates/ingress_eks.yaml +++ b/core/helm-charts/genai-gateway/templates/ingress_eks.yaml @@ -5,12 +5,15 @@ metadata: annotations: alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}' - alb.ingress.kubernetes.io/certificate-arn: "{{ default "" .Values.aws_certificate_arn }}" - alb.ingress.kubernetes.io/group.name: eks-genai + {{- if .Values.aws_certificate_arn }} + alb.ingress.kubernetes.io/certificate-arn: '{{ .Values.aws_certificate_arn }}' + {{- end }} + alb.ingress.kubernetes.io/group.name: ei-eks alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]' alb.ingress.kubernetes.io/reconcile: now alb.ingress.kubernetes.io/scheme: internet-facing alb.ingress.kubernetes.io/target-type: ip + alb.ingress.kubernetes.io/group.order: '3' name: genai-gateway-ingress spec: ingressClassName: alb diff --git a/core/helm-charts/genai-gateway/templates/job-rbac.yaml b/core/helm-charts/genai-gateway/templates/job-rbac.yaml index 05b0c17d..14c12197 100644 --- a/core/helm-charts/genai-gateway/templates/job-rbac.yaml +++ b/core/helm-charts/genai-gateway/templates/job-rbac.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: rbac.authorization.k8s.io/v1 kind: Role diff --git a/core/helm-charts/genai-gateway/templates/postgres-backup-cronjob.yaml b/core/helm-charts/genai-gateway/templates/postgres-backup-cronjob.yaml index 3cf6037c..61968380 100644 --- a/core/helm-charts/genai-gateway/templates/postgres-backup-cronjob.yaml +++ b/core/helm-charts/genai-gateway/templates/postgres-backup-cronjob.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.backup.enabled }} apiVersion: batch/v1 diff --git a/core/helm-charts/genai-gateway/templates/postgres-backup-pvc.yaml b/core/helm-charts/genai-gateway/templates/postgres-backup-pvc.yaml index da3b35a7..470c418c 100644 --- a/core/helm-charts/genai-gateway/templates/postgres-backup-pvc.yaml +++ b/core/helm-charts/genai-gateway/templates/postgres-backup-pvc.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.backup.enabled }} {{- if .Values.backup.pvc.enabled }} diff --git a/core/helm-charts/genai-gateway/templates/postgres-restore-job.yaml b/core/helm-charts/genai-gateway/templates/postgres-restore-job.yaml index 069cd789..d72e0550 100644 --- a/core/helm-charts/genai-gateway/templates/postgres-restore-job.yaml +++ b/core/helm-charts/genai-gateway/templates/postgres-restore-job.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.restoreJob.enabled }} apiVersion: batch/v1 diff --git a/core/helm-charts/genai-gateway/templates/route.yaml b/core/helm-charts/genai-gateway/templates/route.yaml index a0e8c2fe..8468287c 100644 --- a/core/helm-charts/genai-gateway/templates/route.yaml +++ b/core/helm-charts/genai-gateway/templates/route.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.route.enabled }} apiVersion: route.openshift.io/v1 diff --git a/core/helm-charts/genai-gateway/templates/service.yaml b/core/helm-charts/genai-gateway/templates/service.yaml index 8f108713..ce61ad10 100644 --- a/core/helm-charts/genai-gateway/templates/service.yaml +++ b/core/helm-charts/genai-gateway/templates/service.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 kind: Service diff --git a/core/helm-charts/genai-gateway/values.yaml b/core/helm-charts/genai-gateway/values.yaml index c18c83dc..6e38665e 100644 --- a/core/helm-charts/genai-gateway/values.yaml +++ b/core/helm-charts/genai-gateway/values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 replicaCount: 1 image: @@ -15,9 +15,9 @@ affinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists @@ -113,9 +113,9 @@ postgresql: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists @@ -144,9 +144,9 @@ redis: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists diff --git a/core/helm-charts/istio/peer-auth-ingress.yaml b/core/helm-charts/istio/peer-auth-ingress.yaml index 3a6773a8..043cbd87 100644 --- a/core/helm-charts/istio/peer-auth-ingress.yaml +++ b/core/helm-charts/istio/peer-auth-ingress.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: security.istio.io/v1 kind: PeerAuthentication diff --git a/core/helm-charts/istio/peer-authentication.yaml b/core/helm-charts/istio/peer-authentication.yaml index 8fcd8d20..49370638 100644 --- a/core/helm-charts/istio/peer-authentication.yaml +++ b/core/helm-charts/istio/peer-authentication.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: security.istio.io/v1 kind: PeerAuthentication diff --git a/core/helm-charts/keycloak/Chart.yaml b/core/helm-charts/keycloak/Chart.yaml index cd66fdfe..d2bd64d6 100644 --- a/core/helm-charts/keycloak/Chart.yaml +++ b/core/helm-charts/keycloak/Chart.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v2 name: keycloak-apisix diff --git a/core/helm-charts/keycloak/templates/apisixTls.yaml b/core/helm-charts/keycloak/templates/apisixTls.yaml index 3f7951f4..e7c3d5bc 100644 --- a/core/helm-charts/keycloak/templates/apisixTls.yaml +++ b/core/helm-charts/keycloak/templates/apisixTls.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: apisix.apache.org/v2 kind: ApisixTls diff --git a/core/helm-charts/keycloak/templates/apisixroute.yaml b/core/helm-charts/keycloak/templates/apisixroute.yaml index 32754370..ac7a8b47 100644 --- a/core/helm-charts/keycloak/templates/apisixroute.yaml +++ b/core/helm-charts/keycloak/templates/apisixroute.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: apisix.apache.org/v2 kind: ApisixRoute diff --git a/core/helm-charts/keycloak/templates/ingress.yaml b/core/helm-charts/keycloak/templates/ingress.yaml index c0747c19..992360b0 100644 --- a/core/helm-charts/keycloak/templates/ingress.yaml +++ b/core/helm-charts/keycloak/templates/ingress.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if not .Values.apisixRoute.enabled }} apiVersion: networking.k8s.io/v1 diff --git a/core/helm-charts/keycloak/templates/ingress_eks.yaml b/core/helm-charts/keycloak/templates/ingress_eks.yaml index 0e455985..4af2999c 100644 --- a/core/helm-charts/keycloak/templates/ingress_eks.yaml +++ b/core/helm-charts/keycloak/templates/ingress_eks.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: networking.k8s.io/v1 kind: Ingress @@ -7,8 +7,10 @@ metadata: namespace: auth-apisix annotations: alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}' - alb.ingress.kubernetes.io/certificate-arn: "{{ default "" .Values.aws_certificate_arn }}" - alb.ingress.kubernetes.io/group.name: keycloak-apisix + {{- if .Values.aws_certificate_arn }} + alb.ingress.kubernetes.io/certificate-arn: '{{ .Values.aws_certificate_arn }}' + {{- end }} + alb.ingress.kubernetes.io/group.name: ei-eks alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]' alb.ingress.kubernetes.io/scheme: internet-facing alb.ingress.kubernetes.io/target-type: ip diff --git a/core/helm-charts/keycloak/templates/route.yaml b/core/helm-charts/keycloak/templates/route.yaml index 364beb4a..4f725fba 100644 --- a/core/helm-charts/keycloak/templates/route.yaml +++ b/core/helm-charts/keycloak/templates/route.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.route.enabled }} apiVersion: route.openshift.io/v1 diff --git a/core/helm-charts/keycloak/values.yaml b/core/helm-charts/keycloak/values.yaml index a85215c8..86e5639c 100644 --- a/core/helm-charts/keycloak/values.yaml +++ b/core/helm-charts/keycloak/values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 keycloakTokenRoute: host: "" diff --git a/core/helm-charts/mcp-server-template/Chart.yaml b/core/helm-charts/mcp-server-template/Chart.yaml new file mode 100644 index 00000000..ed7221a5 --- /dev/null +++ b/core/helm-charts/mcp-server-template/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v2 +name: mcp-demo +version: 0.1.0 +description: Helm chart for MCP Demo server +appVersion: "1.0.0" +keywords: + - mcp + - kubernetes +type: application diff --git a/core/helm-charts/mcp-server-template/templates/_helpers.tpl b/core/helm-charts/mcp-server-template/templates/_helpers.tpl new file mode 100644 index 00000000..a7399f60 --- /dev/null +++ b/core/helm-charts/mcp-server-template/templates/_helpers.tpl @@ -0,0 +1,60 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "mcp-demo.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +*/}} +{{- define "mcp-demo.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "mcp-demo.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "mcp-demo.labels" -}} +helm.sh/chart: {{ include "mcp-demo.chart" . }} +{{ include "mcp-demo.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "mcp-demo.selectorLabels" -}} +app.kubernetes.io/name: {{ include "mcp-demo.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "mcp-demo.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "mcp-demo.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/core/helm-charts/mcp-server-template/templates/apisixroute.yaml b/core/helm-charts/mcp-server-template/templates/apisixroute.yaml new file mode 100644 index 00000000..10fe702e --- /dev/null +++ b/core/helm-charts/mcp-server-template/templates/apisixroute.yaml @@ -0,0 +1,65 @@ +{{- if .Values.apisix.enabled }} +apiVersion: apisix.apache.org/v2 +kind: ApisixRoute +metadata: + name: {{ include "mcp-demo.fullname" . }}-route + namespace: {{ .Release.Namespace }} + labels: + {{- include "mcp-demo.labels" . | nindent 4 }} +spec: + http: + # MCP endpoint - protected with OIDC + - name: mcp-http-streamable + match: + hosts: + - {{ .Values.ingress.host }} + paths: + - {{ .Values.ingress.path }}* + methods: + - GET + - POST + - PUT + - DELETE + backends: + - serviceName: {{ include "mcp-demo.fullname" . }} + servicePort: {{ .Values.service.port }} + weight: 100 + websocket: true + timeout: + connect: 60s + send: 3600s + read: 3600s + plugins: + - name: proxy-rewrite + enable: true + config: + headers: + set: + # Disable buffering for streaming responses + X-Accel-Buffering: "no" + # Maintain connection for streaming + Connection: "keep-alive" + - name: openid-connect + enable: true + secretRef: {{ include "mcp-demo.fullname" . }}-secret + config: + discovery: {{ .Values.oidc.discovery }} + introspection_endpoint: {{ .Values.oidc.introspection_endpoint }} + introspection_endpoint_auth_method: client_secret_basic + scope: openid profile email + bearer_only: true + realm: master + # Health check endpoint - no OIDC protection + - name: mcp-health + match: + hosts: + - {{ .Values.ingress.host }} + paths: + - /health + methods: + - GET + backends: + - serviceName: {{ include "mcp-demo.fullname" . }} + servicePort: {{ .Values.service.port }} + weight: 100 +{{- end }} diff --git a/core/helm-charts/mcp-server-template/templates/deployment.yaml b/core/helm-charts/mcp-server-template/templates/deployment.yaml new file mode 100644 index 00000000..b52f9f88 --- /dev/null +++ b/core/helm-charts/mcp-server-template/templates/deployment.yaml @@ -0,0 +1,102 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "mcp-demo.fullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "mcp-demo.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + strategy: + {{- toYaml .Values.deploymentStrategy | nindent 4 }} + selector: + matchLabels: + {{- include "mcp-demo.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "mcp-demo.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "mcp-demo.serviceAccountName" . }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.image.pullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ .Chart.Name }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.service.targetPort }} + protocol: TCP + {{- if .Values.livenessProbe.enabled }} + livenessProbe: + httpGet: + path: {{ .Values.livenessProbe.httpGet.path }} + port: {{ .Values.livenessProbe.httpGet.port }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} + {{- end }} + {{- if .Values.readinessProbe.enabled }} + readinessProbe: + httpGet: + path: {{ .Values.readinessProbe.httpGet.path }} + port: {{ .Values.readinessProbe.httpGet.port }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.env }} + env: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.envFrom }} + envFrom: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/core/helm-charts/mcp-server-template/templates/hpa.yaml b/core/helm-charts/mcp-server-template/templates/hpa.yaml new file mode 100644 index 00000000..9e6c83f8 --- /dev/null +++ b/core/helm-charts/mcp-server-template/templates/hpa.yaml @@ -0,0 +1,33 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "mcp-demo.fullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "mcp-demo.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "mcp-demo.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/core/helm-charts/mcp-server-template/templates/ingress.yaml b/core/helm-charts/mcp-server-template/templates/ingress.yaml new file mode 100644 index 00000000..2bc1a77b --- /dev/null +++ b/core/helm-charts/mcp-server-template/templates/ingress.yaml @@ -0,0 +1,46 @@ +{{- if .Values.ingress.enabled }} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "mcp-demo.fullname" . }} + namespace: {{ .Values.apisix.enabled | ternary "auth-apisix" (.Values.ingress.namespace | default .Release.Namespace) }} + labels: + {{- include "mcp-demo.labels" . | nindent 4 }} + annotations: + kubernetes.io/ingress.class: {{ .Values.ingress.className }} + nginx.ingress.kubernetes.io/use-regex: "true" + nginx.ingress.kubernetes.io/proxy-buffering: "off" + nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + {{- with .Values.ingress.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + ingressClassName: {{ .Values.ingress.className }} + {{- if .Values.ingress.tls.enabled }} + tls: + - hosts: + - {{ .Values.ingress.host }} + secretName: {{ .Values.ingress.tls.secretName }} + {{- end }} + {{- $svcName := ternary "auth-apisix-gateway" (include "mcp-demo.fullname" .) .Values.apisix.enabled }} + {{- $svcPort := ternary 80 .Values.service.port .Values.apisix.enabled }} + rules: + - host: {{ .Values.ingress.host }} + http: + paths: + - backend: + service: + name: {{ $svcName }} + port: + number: {{ $svcPort }} + path: /health + pathType: Prefix + - backend: + service: + name: {{ $svcName }} + port: + number: {{ $svcPort }} + path: {{ .Values.ingress.path }} + pathType: Prefix +{{- end }} diff --git a/core/helm-charts/mcp-server-template/templates/networkpolicy.yaml b/core/helm-charts/mcp-server-template/templates/networkpolicy.yaml new file mode 100644 index 00000000..2e83caba --- /dev/null +++ b/core/helm-charts/mcp-server-template/templates/networkpolicy.yaml @@ -0,0 +1,23 @@ +{{- if .Values.networkPolicy.enabled }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "mcp-demo.fullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "mcp-demo.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "mcp-demo.selectorLabels" . | nindent 6 }} + policyTypes: + {{- toYaml .Values.networkPolicy.policyTypes | nindent 4 }} + {{- with .Values.networkPolicy.ingress }} + ingress: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.networkPolicy.egress }} + egress: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/core/helm-charts/mcp-server-template/templates/poddisruptionbudget.yaml b/core/helm-charts/mcp-server-template/templates/poddisruptionbudget.yaml new file mode 100644 index 00000000..ab1e7d32 --- /dev/null +++ b/core/helm-charts/mcp-server-template/templates/poddisruptionbudget.yaml @@ -0,0 +1,19 @@ +{{- if .Values.podDisruptionBudget.enabled }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "mcp-demo.fullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "mcp-demo.labels" . | nindent 4 }} +spec: + {{- if .Values.podDisruptionBudget.minAvailable }} + minAvailable: {{ .Values.podDisruptionBudget.minAvailable }} + {{- end }} + {{- if .Values.podDisruptionBudget.maxUnavailable }} + maxUnavailable: {{ .Values.podDisruptionBudget.maxUnavailable }} + {{- end }} + selector: + matchLabels: + {{- include "mcp-demo.selectorLabels" . | nindent 6 }} +{{- end }} diff --git a/core/helm-charts/mcp-server-template/templates/secret.yaml b/core/helm-charts/mcp-server-template/templates/secret.yaml new file mode 100644 index 00000000..fac0e539 --- /dev/null +++ b/core/helm-charts/mcp-server-template/templates/secret.yaml @@ -0,0 +1,20 @@ +{{- if or .Values.apisix.enabled .Values.secrets.enabled }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "mcp-demo.fullname" . }}-secret + namespace: {{ .Release.Namespace }} + labels: + {{- include "mcp-demo.labels" . | nindent 4 }} +type: Opaque +data: + {{- if .Values.apisix.enabled }} + client_id: {{ .Values.oidc.client_id | b64enc | quote }} + client_secret: {{ .Values.oidc.client_secret | b64enc | quote }} + {{- end }} + {{- if .Values.secrets.enabled }} + {{- range $key, $value := .Values.secrets.data }} + {{ $key }}: {{ $value | b64enc }} + {{- end }} + {{- end }} +{{- end }} diff --git a/core/helm-charts/mcp-server-template/templates/service.yaml b/core/helm-charts/mcp-server-template/templates/service.yaml new file mode 100644 index 00000000..ca400de6 --- /dev/null +++ b/core/helm-charts/mcp-server-template/templates/service.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "mcp-demo.fullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "mcp-demo.labels" . | nindent 4 }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - name: http + port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + selector: + {{- include "mcp-demo.selectorLabels" . | nindent 4 }} diff --git a/core/helm-charts/mcp-server-template/templates/serviceaccount.yaml b/core/helm-charts/mcp-server-template/templates/serviceaccount.yaml new file mode 100644 index 00000000..658a6164 --- /dev/null +++ b/core/helm-charts/mcp-server-template/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "mcp-demo.serviceAccountName" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "mcp-demo.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/core/helm-charts/mcp-server-template/values.yaml b/core/helm-charts/mcp-server-template/values.yaml new file mode 100644 index 00000000..c843e5f0 --- /dev/null +++ b/core/helm-charts/mcp-server-template/values.yaml @@ -0,0 +1,206 @@ +# Replica count: Initial number of pod replicas +# With stateless_http=True in the MCP server, horizontal scaling is fully supported +replicaCount: 1 + +image: + repository: /my_mcp_server + tag: "1.0.0" # Use semantic versioning, never 'latest' in production + pullPolicy: Always # Always pull to ensure correct version with registry + # pullSecrets: [] + +nameOverride: "" +fullnameOverride: "" + +# ServiceAccount configuration +serviceAccount: + create: true + annotations: {} + name: "" + +# Pod annotations +podAnnotations: + prometheus.io/scrape: "false" + +# Pod security context - production hardening +podSecurityContext: + runAsNonRoot: true + runAsUser: 1000 + fsGroup: 1000 + seccompProfile: + type: RuntimeDefault + +# Container security context +securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: false # Set true if app doesn't need write access + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + +service: + type: ClusterIP + port: 8000 + targetPort: 8000 + annotations: {} + +# Ingress configuration +ingress: + enabled: true + className: "nginx" + namespace: auth-apisix + host: api.example.com # Replace with your ingress hostname + # MCP endpoint path - customize for your deployment + path: /demo/mcp + annotations: + nginx.ingress.kubernetes.io/use-regex: "true" + # Streaming-friendly settings for MCP Streamable HTTP (per MCP best practices) + nginx.ingress.kubernetes.io/proxy-buffering: "off" + nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-http-version: "1.1" + tls: + enabled: true + secretName: mcp-server-tls # Replace with your TLS secret name + +# APISIX Route +apisix: + enabled: true + +# Resource limits and requests for HPA-based horizontal scaling +# Keep requests and limits close (1.5-2x) for efficient HPA scaling +# HPA scales based on requests, not limits - right-size your requests! +# Start conservative, monitor actual usage, then adjust +resources: + limits: + cpu: 500m # Allow 2x burst above request + memory: 512Mi # Allow 2x burst above request + requests: + cpu: 250m # Baseline: typical Python web server per pod + memory: 256Mi # Baseline: FastMCP + dependencies + +# Horizontal Pod Autoscaler (HPA) configuration +# Automatically scales pods based on CPU/memory utilization +# Requires metrics-server to be installed in the cluster +autoscaling: + enabled: false + minReplicas: 2 + maxReplicas: 10 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 + +# Liveness: restarts unhealthy pods +# Readiness: removes pods from load balancer if not ready +livenessProbe: + enabled: false + httpGet: + path: /health + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + successThreshold: 1 + +readinessProbe: + enabled: false + httpGet: + path: /health + port: http + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 2 + successThreshold: 1 + +# Deployment strategy - controls how updates are rolled out +# maxSurge: maximum number of pods above desired count during updates +# maxUnavailable: maximum number of pods that can be unavailable during updates +# Current config ensures zero-downtime deployments +deploymentStrategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + +# Pod Disruption Budget - ensures high availability during voluntary disruptions +# minAvailable: minimum number of pods that must be available during disruptions +# Protects against downtime during node drains, upgrades, and maintenance +podDisruptionBudget: + enabled: true + minAvailable: 1 + # maxUnavailable: 1 # Alternative: maximum number of pods that can be unavailable + +# Pod affinity and anti-affinity rules +# podAntiAffinity spreads pods across different nodes for better availability +affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - mcp-server + topologyKey: kubernetes.io/hostname + +# Tolerations +tolerations: [] + +# Node selector +nodeSelector: {} + +# Environment variables +env: [] + # - name: LOG_LEVEL + # value: "info" + +# Environment from configmap/secret +envFrom: [] + # - configMapRef: + # name: app-config + # - secretRef: + # name: app-secrets + +# Volumes +volumes: [] + # - name: tmp + # emptyDir: {} + +# Volume mounts +volumeMounts: [] + # - name: tmp + # mountPath: /tmp + +# OIDC Configuration (OpenID Connect Authentication) +oidc: + realm: master + client_id: "" # Update with value from generate-token.sh + client_secret: "" # Update with value from generate-token.sh + discovery: http://keycloak.default.svc.cluster.local/realms/master/.well-known/openid-configuration + introspection_endpoint: http://keycloak.default.svc.cluster.local/realms/master/protocol/openid-connect/token/introspect + +# Optional extra secrets +secrets: {} + # enabled: true + # data: + # key1: value1 + # key2: value2 + +# Network Policy +networkPolicy: + enabled: false + policyTypes: + - Ingress + - Egress + ingress: [] + egress: [] + +# Priority class +priorityClassName: "" + +# Termination grace period +terminationGracePeriodSeconds: 30 diff --git a/core/helm-charts/observability/habana-exporter/habana-metrics.yml b/core/helm-charts/observability/habana-exporter/habana-metrics.yml index 34fadd59..a33184ca 100644 --- a/core/helm-charts/observability/habana-exporter/habana-metrics.yml +++ b/core/helm-charts/observability/habana-exporter/habana-metrics.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: apps/v1 kind: DaemonSet diff --git a/core/helm-charts/observability/logs-stack/Chart.yaml b/core/helm-charts/observability/logs-stack/Chart.yaml index 321791b9..02e974d8 100644 --- a/core/helm-charts/observability/logs-stack/Chart.yaml +++ b/core/helm-charts/observability/logs-stack/Chart.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v2 name: logs diff --git a/core/helm-charts/observability/logs-stack/aws-s3-values.yaml b/core/helm-charts/observability/logs-stack/aws-s3-values.yaml index 6d2b6e36..d34e8362 100644 --- a/core/helm-charts/observability/logs-stack/aws-s3-values.yaml +++ b/core/helm-charts/observability/logs-stack/aws-s3-values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 otelcol-logs: enabled: true diff --git a/core/helm-charts/observability/logs-stack/templates/grafana-datasources-loki.yaml b/core/helm-charts/observability/logs-stack/templates/grafana-datasources-loki.yaml index 54ff5680..8f73e18e 100644 --- a/core/helm-charts/observability/logs-stack/templates/grafana-datasources-loki.yaml +++ b/core/helm-charts/observability/logs-stack/templates/grafana-datasources-loki.yaml @@ -1,5 +1,5 @@ {{- if .Values.loki.enabled }} -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 kind: ConfigMap diff --git a/core/helm-charts/observability/logs-stack/values.yaml b/core/helm-charts/observability/logs-stack/values.yaml index 4b3afe6f..66bf2d2f 100644 --- a/core/helm-charts/observability/logs-stack/values.yaml +++ b/core/helm-charts/observability/logs-stack/values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 otelcol-logs: enabled: true diff --git a/core/helm-charts/ovms/Chart.yaml b/core/helm-charts/ovms/Chart.yaml index 262fbbb3..514230e3 100644 --- a/core/helm-charts/ovms/Chart.yaml +++ b/core/helm-charts/ovms/Chart.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v2 diff --git a/core/helm-charts/ovms/templates/_helpers.tpl b/core/helm-charts/ovms/templates/_helpers.tpl index a2d72e13..69279a05 100644 --- a/core/helm-charts/ovms/templates/_helpers.tpl +++ b/core/helm-charts/ovms/templates/_helpers.tpl @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{/* diff --git a/core/helm-charts/ovms/templates/apisixroute.yaml b/core/helm-charts/ovms/templates/apisixroute.yaml index 74cf50a5..78592e62 100644 --- a/core/helm-charts/ovms/templates/apisixroute.yaml +++ b/core/helm-charts/ovms/templates/apisixroute.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if and .Values.apisixRoute.enabled .Values.modelSource }} diff --git a/core/helm-charts/ovms/templates/deployment.yaml b/core/helm-charts/ovms/templates/deployment.yaml index 28776948..8306e163 100644 --- a/core/helm-charts/ovms/templates/deployment.yaml +++ b/core/helm-charts/ovms/templates/deployment.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if and .Values.modelSource (not (hasPrefix "OpenVINO/" .Values.modelSource)) (not .Values.huggingface.token) }} diff --git a/core/helm-charts/ovms/templates/ingress.yaml b/core/helm-charts/ovms/templates/ingress.yaml index cb8ea085..c5f0865e 100644 --- a/core/helm-charts/ovms/templates/ingress.yaml +++ b/core/helm-charts/ovms/templates/ingress.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if and .Values.ingress.enabled .Values.modelSource }} diff --git a/core/helm-charts/ovms/templates/pvc.yaml b/core/helm-charts/ovms/templates/pvc.yaml index e9ead3d4..00682aef 100644 --- a/core/helm-charts/ovms/templates/pvc.yaml +++ b/core/helm-charts/ovms/templates/pvc.yaml @@ -1,25 +1,25 @@ -# Copyright (C) 2024-2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -{{- if and .Values.storage.persistentVolume.enabled (not .Values.storage.persistentVolume.existingClaim) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "ovms-model-server.fullname" . }} - namespace: {{ .Values.namespace }} - labels: - {{- include "ovms-model-server.labels" . | nindent 4 }} - {{- if not .Values.storage.persistentVolume.deleteOnUninstall }} - annotations: - "helm.sh/resource-policy": keep - {{- end }} -spec: - accessModes: - - {{ .Values.storage.persistentVolume.accessMode | default "ReadWriteOnce" }} - {{- if .Values.storage.persistentVolume.storageClass }} - storageClassName: {{ .Values.storage.persistentVolume.storageClass }} - {{- end }} - resources: - requests: - storage: {{ .Values.storage.persistentVolume.size | default "50Gi" }} -{{- end }} +# Copyright (C) 2025-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if and .Values.storage.persistentVolume.enabled (not .Values.storage.persistentVolume.existingClaim) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "ovms-model-server.fullname" . }} + namespace: {{ .Values.namespace }} + labels: + {{- include "ovms-model-server.labels" . | nindent 4 }} + {{- if not .Values.storage.persistentVolume.deleteOnUninstall }} + annotations: + "helm.sh/resource-policy": keep + {{- end }} +spec: + accessModes: + - {{ .Values.storage.persistentVolume.accessMode | default "ReadWriteOnce" }} + {{- if .Values.storage.persistentVolume.storageClass }} + storageClassName: {{ .Values.storage.persistentVolume.storageClass }} + {{- end }} + resources: + requests: + storage: {{ .Values.storage.persistentVolume.size | default "50Gi" }} +{{- end }} diff --git a/core/helm-charts/ovms/templates/secret.yaml b/core/helm-charts/ovms/templates/secret.yaml index 4546670f..0c7e64e1 100644 --- a/core/helm-charts/ovms/templates/secret.yaml +++ b/core/helm-charts/ovms/templates/secret.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if or .Values.oidc.enabled .Values.secrets.enabled }} diff --git a/core/helm-charts/ovms/templates/service.yaml b/core/helm-charts/ovms/templates/service.yaml index 814b8927..749dc795 100644 --- a/core/helm-charts/ovms/templates/service.yaml +++ b/core/helm-charts/ovms/templates/service.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 diff --git a/core/helm-charts/ovms/values.yaml b/core/helm-charts/ovms/values.yaml index 11adf9d0..5e29d543 100644 --- a/core/helm-charts/ovms/values.yaml +++ b/core/helm-charts/ovms/values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Default values for ovms-model-server Helm chart diff --git a/core/helm-charts/tei/Chart.yaml b/core/helm-charts/tei/Chart.yaml index 908f5d3c..fe079e13 100644 --- a/core/helm-charts/tei/Chart.yaml +++ b/core/helm-charts/tei/Chart.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v2 diff --git a/core/helm-charts/tei/ci-gaudi-values.yaml b/core/helm-charts/tei/ci-gaudi-values.yaml index c09db7a0..b55b5bb3 100644 --- a/core/helm-charts/tei/ci-gaudi-values.yaml +++ b/core/helm-charts/tei/ci-gaudi-values.yaml @@ -1,3 +1,3 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 gaudi-values.yaml diff --git a/core/helm-charts/tei/ci-values.yaml b/core/helm-charts/tei/ci-values.yaml index b0acdaf3..8211daac 100644 --- a/core/helm-charts/tei/ci-values.yaml +++ b/core/helm-charts/tei/ci-values.yaml @@ -1,3 +1,3 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 values.yaml diff --git a/core/helm-charts/tei/gaudi-values.yaml b/core/helm-charts/tei/gaudi-values.yaml index edd42f78..f17b4d13 100644 --- a/core/helm-charts/tei/gaudi-values.yaml +++ b/core/helm-charts/tei/gaudi-values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Default values for tei. @@ -20,6 +20,15 @@ resources: limits: habana.ai/gaudi: 1 +affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: ei-inference-eligible + operator: In + values: ["true"] + livenessProbe: timeoutSeconds: 1 readinessProbe: diff --git a/core/helm-charts/tei/ingress_eks.yaml b/core/helm-charts/tei/ingress_eks.yaml index b0eb1061..30739f44 100644 --- a/core/helm-charts/tei/ingress_eks.yaml +++ b/core/helm-charts/tei/ingress_eks.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if or .Values.ingress.enabled .Values.apisix.enabled }} apiVersion: networking.k8s.io/v1 diff --git a/core/helm-charts/tei/templates/apisixroutes.yaml b/core/helm-charts/tei/templates/apisixroutes.yaml index 63f850a4..3b9f00c5 100644 --- a/core/helm-charts/tei/templates/apisixroutes.yaml +++ b/core/helm-charts/tei/templates/apisixroutes.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.apisix.enabled }} apiVersion: apisix.apache.org/v2 diff --git a/core/helm-charts/tei/templates/configmap.yaml b/core/helm-charts/tei/templates/configmap.yaml index a3cd83ca..fc87b8ea 100644 --- a/core/helm-charts/tei/templates/configmap.yaml +++ b/core/helm-charts/tei/templates/configmap.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 diff --git a/core/helm-charts/tei/templates/deployment.yaml b/core/helm-charts/tei/templates/deployment.yaml index 0ca55a97..c3378b63 100644 --- a/core/helm-charts/tei/templates/deployment.yaml +++ b/core/helm-charts/tei/templates/deployment.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: apps/v1 diff --git a/core/helm-charts/tei/templates/ingress.yaml b/core/helm-charts/tei/templates/ingress.yaml index 9f817461..aba43f9a 100644 --- a/core/helm-charts/tei/templates/ingress.yaml +++ b/core/helm-charts/tei/templates/ingress.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Please edit the object below. Lines beginning with a '#' will be ignored, # and an empty file will abort the edit. If an error occurs while saving this file will be diff --git a/core/helm-charts/tei/templates/ingress_eks.yaml b/core/helm-charts/tei/templates/ingress_eks.yaml index f2537a85..462c767e 100644 --- a/core/helm-charts/tei/templates/ingress_eks.yaml +++ b/core/helm-charts/tei/templates/ingress_eks.yaml @@ -1,15 +1,21 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if or .Values.ingress.enabled .Values.apisix.enabled }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: {{ include "tei.fullname" . }}-ingress - namespace: {{ .Values.ingress.namespace }} + namespace: {{- if .Values.apisix.enabled }} + auth-apisix + {{- else }} + {{ .Values.ingress.namespace }} + {{- end }} annotations: alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}' - alb.ingress.kubernetes.io/certificate-arn: "{{ default "" .Values.aws_certificate_arn }}" - alb.ingress.kubernetes.io/group.name: keycloak-apisix + {{- if .Values.aws_certificate_arn }} + alb.ingress.kubernetes.io/certificate-arn: '{{ .Values.aws_certificate_arn }}' + {{- end }} + alb.ingress.kubernetes.io/group.name: ei-eks alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]' alb.ingress.kubernetes.io/scheme: internet-facing alb.ingress.kubernetes.io/target-type: ip diff --git a/core/helm-charts/tei/templates/pvc.yaml b/core/helm-charts/tei/templates/pvc.yaml index d122d94e..3f855b7f 100644 --- a/core/helm-charts/tei/templates/pvc.yaml +++ b/core/helm-charts/tei/templates/pvc.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.pvc.enabled }} apiVersion: v1 diff --git a/core/helm-charts/tei/templates/route.yaml b/core/helm-charts/tei/templates/route.yaml index 612370c8..91e34c12 100644 --- a/core/helm-charts/tei/templates/route.yaml +++ b/core/helm-charts/tei/templates/route.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.route.enabled }} apiVersion: route.openshift.io/v1 diff --git a/core/helm-charts/tei/templates/secret.yaml b/core/helm-charts/tei/templates/secret.yaml index f9ccca34..792e446d 100644 --- a/core/helm-charts/tei/templates/secret.yaml +++ b/core/helm-charts/tei/templates/secret.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 kind: Secret diff --git a/core/helm-charts/tei/templates/service.yaml b/core/helm-charts/tei/templates/service.yaml index b4b26558..631cc60a 100644 --- a/core/helm-charts/tei/templates/service.yaml +++ b/core/helm-charts/tei/templates/service.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 diff --git a/core/helm-charts/tei/templates/servicemonitor.yaml b/core/helm-charts/tei/templates/servicemonitor.yaml index 87e3ab53..5dc2a718 100644 --- a/core/helm-charts/tei/templates/servicemonitor.yaml +++ b/core/helm-charts/tei/templates/servicemonitor.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.global.monitoring }} diff --git a/core/helm-charts/tei/values.yaml b/core/helm-charts/tei/values.yaml index 373cd159..90357503 100644 --- a/core/helm-charts/tei/values.yaml +++ b/core/helm-charts/tei/values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Default values for tei. @@ -143,17 +143,9 @@ affinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-inference-eligible operator: In - values: ["inference"] - - matchExpressions: - - key: role - operator: In - values: ["inference-xeon"] - - matchExpressions: - - key: role - operator: In - values: ["inference-cpu"] + values: ["true"] global: http_proxy: "" diff --git a/core/helm-charts/teirerank/Chart.yaml b/core/helm-charts/teirerank/Chart.yaml index b3fd624f..7c6009ec 100644 --- a/core/helm-charts/teirerank/Chart.yaml +++ b/core/helm-charts/teirerank/Chart.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v2 diff --git a/core/helm-charts/teirerank/ci-gaudi-values.yaml b/core/helm-charts/teirerank/ci-gaudi-values.yaml index c09db7a0..b55b5bb3 100644 --- a/core/helm-charts/teirerank/ci-gaudi-values.yaml +++ b/core/helm-charts/teirerank/ci-gaudi-values.yaml @@ -1,3 +1,3 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 gaudi-values.yaml diff --git a/core/helm-charts/teirerank/ci-values.yaml b/core/helm-charts/teirerank/ci-values.yaml index b0acdaf3..8211daac 100644 --- a/core/helm-charts/teirerank/ci-values.yaml +++ b/core/helm-charts/teirerank/ci-values.yaml @@ -1,3 +1,3 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 values.yaml diff --git a/core/helm-charts/teirerank/gaudi-values.yaml b/core/helm-charts/teirerank/gaudi-values.yaml index 39275cbe..4972199b 100644 --- a/core/helm-charts/teirerank/gaudi-values.yaml +++ b/core/helm-charts/teirerank/gaudi-values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Default values for teirerank. @@ -25,13 +25,9 @@ affinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-inference-eligible operator: In - values: ["inference"] - - matchExpressions: - - key: role - operator: In - values: ["inference-gaudi"] + values: ["true"] livenessProbe: timeoutSeconds: 1 diff --git a/core/helm-charts/teirerank/templates/apisixroutes.yaml b/core/helm-charts/teirerank/templates/apisixroutes.yaml index fd83b0c9..d04d2a0b 100644 --- a/core/helm-charts/teirerank/templates/apisixroutes.yaml +++ b/core/helm-charts/teirerank/templates/apisixroutes.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.apisix.enabled }} apiVersion: apisix.apache.org/v2 diff --git a/core/helm-charts/teirerank/templates/configmap.yaml b/core/helm-charts/teirerank/templates/configmap.yaml index 968f6a13..8aaad1b8 100644 --- a/core/helm-charts/teirerank/templates/configmap.yaml +++ b/core/helm-charts/teirerank/templates/configmap.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 kind: ConfigMap diff --git a/core/helm-charts/teirerank/templates/deployment.yaml b/core/helm-charts/teirerank/templates/deployment.yaml index 435c0185..89269724 100644 --- a/core/helm-charts/teirerank/templates/deployment.yaml +++ b/core/helm-charts/teirerank/templates/deployment.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: apps/v1 diff --git a/core/helm-charts/teirerank/templates/ingress.yaml b/core/helm-charts/teirerank/templates/ingress.yaml index e3f712c3..402d05ab 100644 --- a/core/helm-charts/teirerank/templates/ingress.yaml +++ b/core/helm-charts/teirerank/templates/ingress.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Please edit the object below. Lines beginning with a '#' will be ignored, diff --git a/core/helm-charts/teirerank/templates/ingress_eks.yaml b/core/helm-charts/teirerank/templates/ingress_eks.yaml index 16343c7c..202afd6d 100644 --- a/core/helm-charts/teirerank/templates/ingress_eks.yaml +++ b/core/helm-charts/teirerank/templates/ingress_eks.yaml @@ -1,15 +1,21 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.ingress.enabled }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: {{ include "teirerank.fullname" . }}-ingress - namespace: {{ .Values.ingress.namespace }} + namespace: {{- if .Values.apisix.enabled }} + auth-apisix + {{- else }} + {{ .Values.ingress.namespace }} + {{- end }} annotations: alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}' - alb.ingress.kubernetes.io/certificate-arn: "{{ default "" .Values.aws_certificate_arn }}" - alb.ingress.kubernetes.io/group.name: keycloak-apisix + {{- if .Values.aws_certificate_arn }} + alb.ingress.kubernetes.io/certificate-arn: '{{ .Values.aws_certificate_arn }}' + {{- end }} + alb.ingress.kubernetes.io/group.name: ei-eks alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]' alb.ingress.kubernetes.io/scheme: internet-facing alb.ingress.kubernetes.io/target-type: ip diff --git a/core/helm-charts/teirerank/templates/pvc.yaml b/core/helm-charts/teirerank/templates/pvc.yaml index 34cc0db2..94e014ff 100644 --- a/core/helm-charts/teirerank/templates/pvc.yaml +++ b/core/helm-charts/teirerank/templates/pvc.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.pvc.enabled }} diff --git a/core/helm-charts/teirerank/templates/route.yaml b/core/helm-charts/teirerank/templates/route.yaml index 55185712..590aacd8 100644 --- a/core/helm-charts/teirerank/templates/route.yaml +++ b/core/helm-charts/teirerank/templates/route.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.route.enabled }} apiVersion: route.openshift.io/v1 diff --git a/core/helm-charts/teirerank/templates/secret.yaml b/core/helm-charts/teirerank/templates/secret.yaml index 27e3f7d9..da53b589 100644 --- a/core/helm-charts/teirerank/templates/secret.yaml +++ b/core/helm-charts/teirerank/templates/secret.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 kind: Secret diff --git a/core/helm-charts/teirerank/templates/service.yaml b/core/helm-charts/teirerank/templates/service.yaml index 3d3f7f56..e9f5317d 100644 --- a/core/helm-charts/teirerank/templates/service.yaml +++ b/core/helm-charts/teirerank/templates/service.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 diff --git a/core/helm-charts/teirerank/templates/servicemonitor.yaml b/core/helm-charts/teirerank/templates/servicemonitor.yaml index cbd66405..8074cf83 100644 --- a/core/helm-charts/teirerank/templates/servicemonitor.yaml +++ b/core/helm-charts/teirerank/templates/servicemonitor.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.global.monitoring }} diff --git a/core/helm-charts/teirerank/values.yaml b/core/helm-charts/teirerank/values.yaml index a5f87689..82909535 100644 --- a/core/helm-charts/teirerank/values.yaml +++ b/core/helm-charts/teirerank/values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Default values for teirerank. @@ -148,17 +148,9 @@ affinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-inference-eligible operator: In - values: ["inference"] - - matchExpressions: - - key: role - operator: In - values: ["inference-xeon"] - - matchExpressions: - - key: role - operator: In - values: ["inference-cpu"] + values: ["true"] global: http_proxy: "" diff --git a/core/helm-charts/tgi/Chart.yaml b/core/helm-charts/tgi/Chart.yaml index 95dd5130..0c288f6f 100644 --- a/core/helm-charts/tgi/Chart.yaml +++ b/core/helm-charts/tgi/Chart.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v2 diff --git a/core/helm-charts/tgi/ci-gaudi-values.yaml b/core/helm-charts/tgi/ci-gaudi-values.yaml index c09db7a0..b55b5bb3 100644 --- a/core/helm-charts/tgi/ci-gaudi-values.yaml +++ b/core/helm-charts/tgi/ci-gaudi-values.yaml @@ -1,3 +1,3 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 gaudi-values.yaml diff --git a/core/helm-charts/tgi/ci-values.yaml b/core/helm-charts/tgi/ci-values.yaml index b0acdaf3..8211daac 100644 --- a/core/helm-charts/tgi/ci-values.yaml +++ b/core/helm-charts/tgi/ci-values.yaml @@ -1,3 +1,3 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 values.yaml diff --git a/core/helm-charts/tgi/gaudi-values.yaml b/core/helm-charts/tgi/gaudi-values.yaml index dd95fa79..487cecff 100644 --- a/core/helm-charts/tgi/gaudi-values.yaml +++ b/core/helm-charts/tgi/gaudi-values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Default values for tgi. @@ -31,13 +31,9 @@ affinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-inference-eligible operator: In - values: ["inference"] - - matchExpressions: - - key: role - operator: In - values: ["inference-gaudi"] + values: ["true"] livenessProbe: initialDelaySeconds: 5 diff --git a/core/helm-charts/tgi/templates/apisixroutes.yaml b/core/helm-charts/tgi/templates/apisixroutes.yaml index 2d5ad8a1..c9b3180b 100644 --- a/core/helm-charts/tgi/templates/apisixroutes.yaml +++ b/core/helm-charts/tgi/templates/apisixroutes.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.apisix.enabled }} apiVersion: apisix.apache.org/v2 diff --git a/core/helm-charts/tgi/templates/configmap.yaml b/core/helm-charts/tgi/templates/configmap.yaml index 9cdb6275..35fcd1e6 100644 --- a/core/helm-charts/tgi/templates/configmap.yaml +++ b/core/helm-charts/tgi/templates/configmap.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 diff --git a/core/helm-charts/tgi/templates/deployment.yaml b/core/helm-charts/tgi/templates/deployment.yaml index 46377d52..64b886bf 100644 --- a/core/helm-charts/tgi/templates/deployment.yaml +++ b/core/helm-charts/tgi/templates/deployment.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: apps/v1 diff --git a/core/helm-charts/tgi/templates/ingress.yaml b/core/helm-charts/tgi/templates/ingress.yaml index 2c91016c..2e1deac1 100644 --- a/core/helm-charts/tgi/templates/ingress.yaml +++ b/core/helm-charts/tgi/templates/ingress.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Please edit the object below. Lines beginning with a '#' will be ignored, # and an empty file will abort the edit. If an error occurs while saving this file will be diff --git a/core/helm-charts/tgi/templates/ingress_eks.yaml b/core/helm-charts/tgi/templates/ingress_eks.yaml index 0ea9a4be..d0cddf64 100644 --- a/core/helm-charts/tgi/templates/ingress_eks.yaml +++ b/core/helm-charts/tgi/templates/ingress_eks.yaml @@ -1,15 +1,21 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.ingress.enabled }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: {{ include "tgi.fullname" . }}-ingress - namespace: {{ .Values.ingress.namespace }} + namespace: {{- if .Values.apisix.enabled }} + auth-apisix + {{- else }} + {{ .Values.ingress.namespace }} + {{- end }} annotations: alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}' - alb.ingress.kubernetes.io/certificate-arn: "{{ default "" .Values.aws_certificate_arn }}" - alb.ingress.kubernetes.io/group.name: keycloak-apisix + {{- if .Values.aws_certificate_arn }} + alb.ingress.kubernetes.io/certificate-arn: '{{ .Values.aws_certificate_arn }}' + {{- end }} + alb.ingress.kubernetes.io/group.name: ei-eks alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]' alb.ingress.kubernetes.io/scheme: internet-facing alb.ingress.kubernetes.io/target-type: ip diff --git a/core/helm-charts/tgi/templates/pvc.yaml b/core/helm-charts/tgi/templates/pvc.yaml index 24680d8d..c7abf925 100644 --- a/core/helm-charts/tgi/templates/pvc.yaml +++ b/core/helm-charts/tgi/templates/pvc.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.pvc.enabled }} apiVersion: v1 diff --git a/core/helm-charts/tgi/templates/route.yaml b/core/helm-charts/tgi/templates/route.yaml index bf260fd6..b3963777 100644 --- a/core/helm-charts/tgi/templates/route.yaml +++ b/core/helm-charts/tgi/templates/route.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.route.enabled }} {{- $modelName := (default .Values.LLM_MODEL_ID .Values.SERVED_MODEL_NAME) | splitList "/" | last }} diff --git a/core/helm-charts/tgi/templates/secret.yaml b/core/helm-charts/tgi/templates/secret.yaml index 9e238c26..2feff12e 100644 --- a/core/helm-charts/tgi/templates/secret.yaml +++ b/core/helm-charts/tgi/templates/secret.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 kind: Secret diff --git a/core/helm-charts/tgi/templates/service.yaml b/core/helm-charts/tgi/templates/service.yaml index 49ad5d81..b7743667 100644 --- a/core/helm-charts/tgi/templates/service.yaml +++ b/core/helm-charts/tgi/templates/service.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 diff --git a/core/helm-charts/tgi/templates/servicemonitor.yaml b/core/helm-charts/tgi/templates/servicemonitor.yaml index 8c0bfdbe..621b809a 100644 --- a/core/helm-charts/tgi/templates/servicemonitor.yaml +++ b/core/helm-charts/tgi/templates/servicemonitor.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # # Dashboard for the exposed TGI metrics: diff --git a/core/helm-charts/tgi/values.yaml b/core/helm-charts/tgi/values.yaml index 7cb196cf..9dea2a7f 100644 --- a/core/helm-charts/tgi/values.yaml +++ b/core/helm-charts/tgi/values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Default values for tgi. @@ -165,17 +165,9 @@ affinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-inference-eligible operator: In - values: ["inference"] - - matchExpressions: - - key: role - operator: In - values: ["inference-xeon"] - - matchExpressions: - - key: role - operator: In - values: ["inference-cpu"] + values: ["true"] LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 diff --git a/core/helm-charts/vllm/Chart.yaml b/core/helm-charts/vllm/Chart.yaml index 94c3a8b4..c76f5229 100644 --- a/core/helm-charts/vllm/Chart.yaml +++ b/core/helm-charts/vllm/Chart.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v2 diff --git a/core/helm-charts/vllm/ci-gaudi-values.yaml b/core/helm-charts/vllm/ci-gaudi-values.yaml index c09db7a0..b55b5bb3 100644 --- a/core/helm-charts/vllm/ci-gaudi-values.yaml +++ b/core/helm-charts/vllm/ci-gaudi-values.yaml @@ -1,3 +1,3 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 gaudi-values.yaml diff --git a/core/helm-charts/vllm/ci-values.yaml b/core/helm-charts/vllm/ci-values.yaml index b0acdaf3..8211daac 100644 --- a/core/helm-charts/vllm/ci-values.yaml +++ b/core/helm-charts/vllm/ci-values.yaml @@ -1,3 +1,3 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 values.yaml diff --git a/core/helm-charts/vllm/gaudi-values.yaml b/core/helm-charts/vllm/gaudi-values.yaml index c3155455..1c8abe91 100644 --- a/core/helm-charts/vllm/gaudi-values.yaml +++ b/core/helm-charts/vllm/gaudi-values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # User-configurable parameters (can be set via --set during helm install) @@ -21,13 +21,9 @@ affinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-inference-eligible operator: In - values: ["inference"] - - matchExpressions: - - key: role - operator: In - values: ["inference-gaudi"] + values: ["true"] runtime: "habana" diff --git a/core/helm-charts/vllm/gaudi3-values.yaml b/core/helm-charts/vllm/gaudi3-values.yaml index 8283aa72..4a5443bd 100644 --- a/core/helm-charts/vllm/gaudi3-values.yaml +++ b/core/helm-charts/vllm/gaudi3-values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # User-configurable parameters (can be set via --set during helm install) @@ -20,13 +20,9 @@ affinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-inference-eligible operator: In - values: ["inference"] - - matchExpressions: - - key: role - operator: In - values: ["inference-gaudi"] + values: ["true"] runtime: "habana" HABANA_VISIBLE_DEVICES: "all" diff --git a/core/helm-charts/vllm/templates/apisixroutes.yaml b/core/helm-charts/vllm/templates/apisixroutes.yaml index 03ca149f..143e0eee 100644 --- a/core/helm-charts/vllm/templates/apisixroutes.yaml +++ b/core/helm-charts/vllm/templates/apisixroutes.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.apisix.enabled }} {{- $modelName := (default .Values.LLM_MODEL_ID .Values.SERVED_MODEL_NAME) | splitList "/" | last }} @@ -33,7 +33,7 @@ spec: secretRef: {{ include "vllm.fullname" . }}-secret config: discovery: {{ .Values.oidc.discovery }} - {{- if eq .Values.platform "openshift" }} + {{- if or (eq .Values.platform "openshift") (eq .Values.platform "eks") }} use_jwks: {{ .Values.oidc.use_jwks }} {{- else }} introspection_endpoint: {{ .Values.oidc.introspection_endpoint }} diff --git a/core/helm-charts/vllm/templates/configmap.yaml b/core/helm-charts/vllm/templates/configmap.yaml index 5e3bf02d..3188c752 100644 --- a/core/helm-charts/vllm/templates/configmap.yaml +++ b/core/helm-charts/vllm/templates/configmap.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- $modelName := (default .Values.LLM_MODEL_ID .Values.SERVED_MODEL_NAME) }} apiVersion: v1 diff --git a/core/helm-charts/vllm/templates/deployment.yaml b/core/helm-charts/vllm/templates/deployment.yaml index 40e7c830..660e6813 100644 --- a/core/helm-charts/vllm/templates/deployment.yaml +++ b/core/helm-charts/vllm/templates/deployment.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- $modelName := (default .Values.LLM_MODEL_ID .Values.SERVED_MODEL_NAME) }} apiVersion: apps/v1 @@ -23,9 +23,6 @@ spec: balloon.balloons.resource-policy.nri.io: {{ .Values.cpu_balloon_annotation | quote }} {{- end }} {{- end }} - {{- with .Values.podAnnotations }} - {{- toYaml . | nindent 8 }} - {{- end }} labels: {{- include "vllm.selectorLabels" . | nindent 8 }} {{- if not .Values.accelDevice }} @@ -168,4 +165,4 @@ spec: {{- if not .Values.accelDevice }} # extra time to finish processing buffered requests on CPU before pod is forcibly terminated terminationGracePeriodSeconds: 120 - {{- end }} \ No newline at end of file + {{- end }} diff --git a/core/helm-charts/vllm/templates/ingress.yaml b/core/helm-charts/vllm/templates/ingress.yaml index f2d9623c..5ef5caf5 100644 --- a/core/helm-charts/vllm/templates/ingress.yaml +++ b/core/helm-charts/vllm/templates/ingress.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Please edit the object below. Lines beginning with a '#' will be ignored, # and an empty file will abort the edit. If an error occurs while saving this file will be diff --git a/core/helm-charts/vllm/templates/ingress_eks.yaml b/core/helm-charts/vllm/templates/ingress_eks.yaml index 7f52a44d..51710c16 100644 --- a/core/helm-charts/vllm/templates/ingress_eks.yaml +++ b/core/helm-charts/vllm/templates/ingress_eks.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.ingress.enabled }} {{- $modelName := (default .Values.LLM_MODEL_ID .Values.SERVED_MODEL_NAME) | splitList "/" | last }} @@ -6,15 +6,20 @@ apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: {{ include "vllm.fullname" . }}-ingress - namespace: {{ .Values.ingress.namespace }} + namespace: {{- if .Values.apisix.enabled }} + auth-apisix + {{- else }} + {{ .Values.ingress.namespace }} + {{- end }} annotations: alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}' - alb.ingress.kubernetes.io/certificate-arn: "{{ default "" .Values.aws_certificate_arn }}" - alb.ingress.kubernetes.io/group.name: keycloak-apisix + {{- if .Values.aws_certificate_arn }} + alb.ingress.kubernetes.io/certificate-arn: '{{ .Values.aws_certificate_arn }}' + {{- end }} + alb.ingress.kubernetes.io/group.name: ei-eks alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]' alb.ingress.kubernetes.io/scheme: internet-facing alb.ingress.kubernetes.io/target-type: ip - alb.ingress.kubernetes.io/group.order: '2' spec: ingressClassName: alb rules: diff --git a/core/helm-charts/vllm/templates/pvc.yaml b/core/helm-charts/vllm/templates/pvc.yaml index 57d4402a..73b50343 100644 --- a/core/helm-charts/vllm/templates/pvc.yaml +++ b/core/helm-charts/vllm/templates/pvc.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if and .Values.pvc.enabled .Release.IsInstall }} apiVersion: v1 diff --git a/core/helm-charts/vllm/templates/route.yaml b/core/helm-charts/vllm/templates/route.yaml index dcc90fd1..4ed0f65b 100644 --- a/core/helm-charts/vllm/templates/route.yaml +++ b/core/helm-charts/vllm/templates/route.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.route.enabled }} {{- $modelName := (default .Values.LLM_MODEL_ID .Values.SERVED_MODEL_NAME) | splitList "/" | last }} diff --git a/core/helm-charts/vllm/templates/secret.yaml b/core/helm-charts/vllm/templates/secret.yaml index 811b9c02..58226c5c 100644 --- a/core/helm-charts/vllm/templates/secret.yaml +++ b/core/helm-charts/vllm/templates/secret.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 kind: Secret diff --git a/core/helm-charts/vllm/templates/service.yaml b/core/helm-charts/vllm/templates/service.yaml index 71c8700c..9e6bbf26 100644 --- a/core/helm-charts/vllm/templates/service.yaml +++ b/core/helm-charts/vllm/templates/service.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 apiVersion: v1 diff --git a/core/helm-charts/vllm/templates/servicemonitor.yaml b/core/helm-charts/vllm/templates/servicemonitor.yaml index 25e044ea..6367d98f 100644 --- a/core/helm-charts/vllm/templates/servicemonitor.yaml +++ b/core/helm-charts/vllm/templates/servicemonitor.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.svcmonitor.enabled }} apiVersion: monitoring.coreos.com/v1 diff --git a/core/helm-charts/vllm/values.yaml b/core/helm-charts/vllm/values.yaml index f8b309b3..3031bc9a 100644 --- a/core/helm-charts/vllm/values.yaml +++ b/core/helm-charts/vllm/values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Default values for vllm. @@ -152,17 +152,9 @@ affinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-inference-eligible operator: In - values: ["inference"] - - matchExpressions: - - key: role - operator: In - values: ["inference-xeon"] - - matchExpressions: - - key: role - operator: In - values: ["inference-cpu"] + values: ["true"] LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 SERVED_MODEL_NAME: "" diff --git a/core/helm-charts/vllm/xeon-values.yaml b/core/helm-charts/vllm/xeon-values.yaml index 00cb6384..ec58c8e5 100644 --- a/core/helm-charts/vllm/xeon-values.yaml +++ b/core/helm-charts/vllm/xeon-values.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Xeon CPU-optimized override values for vLLM deployments @@ -17,7 +17,6 @@ resources: cpu: "{{ .Values.cpu }}" memory: "{{ .Values.memory }}" - # CPU-specific configurations block_size: 128 max_num_seqs: 256 @@ -37,17 +36,9 @@ affinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role - operator: In - values: ["inference"] - - matchExpressions: - - key: role - operator: In - values: ["inference-xeon"] - - matchExpressions: - - key: role + - key: ei-inference-eligible operator: In - values: ["inference-cpu"] + values: ["true"] # vLLM CPU environment variables VLLM_CPU_SGL_KERNEL: "1" @@ -270,4 +261,4 @@ defaultModelConfigs: "256", ] tensor_parallel_size: "{{ .Values.tensor_parallel_size }}" - pipeline_parallel_size: "{{ .Values.pipeline_parallel_size }}" \ No newline at end of file + pipeline_parallel_size: "{{ .Values.pipeline_parallel_size }}" diff --git a/core/inference-stack-deploy.sh b/core/inference-stack-deploy.sh index d3936f92..148ae5b8 100644 --- a/core/inference-stack-deploy.sh +++ b/core/inference-stack-deploy.sh @@ -8,7 +8,7 @@ CYAN=$(tput setaf 6) NC=$(tput sgr0) -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Permission is granted for recipient to internally use and modify this software for purposes of benchmarking and testing on Intel architectures. diff --git a/core/inventory/inference-config.cfg b/core/inventory/inference-config.cfg index 57b8591d..e63552d4 100644 --- a/core/inventory/inference-config.cfg +++ b/core/inventory/inference-config.cfg @@ -17,4 +17,7 @@ deploy_observability=off deploy_llm_models=on deploy_ceph=off deploy_istio=off -uninstall_ceph=off \ No newline at end of file +uninstall_ceph=off + +# Agentic AI Plugin +deploy_agenticai_plugin=off \ No newline at end of file diff --git a/core/inventory/metadata/addons.yml b/core/inventory/metadata/addons.yml index c4e07d6a..ca6424ca 100644 --- a/core/inventory/metadata/addons.yml +++ b/core/inventory/metadata/addons.yml @@ -1,5 +1,5 @@ --- -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Kubernetes dashboard # RBAC required. see docs/getting-started.md for access details. diff --git a/core/inventory/metadata/all.yml b/core/inventory/metadata/all.yml index 51f8ed4f..0c8d7744 100644 --- a/core/inventory/metadata/all.yml +++ b/core/inventory/metadata/all.yml @@ -1,5 +1,5 @@ --- -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 ## Directory where the binaries will be installed bin_dir: /usr/local/bin diff --git a/core/inventory/metadata/vars/inference_common.yml b/core/inventory/metadata/vars/inference_common.yml index 48cf6a5a..99f97770 100644 --- a/core/inventory/metadata/vars/inference_common.yml +++ b/core/inventory/metadata/vars/inference_common.yml @@ -1,7 +1,7 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 helm_charts_base: "{{ lookup('env', 'PWD') }}/helm-charts" remote_home_dir: "{{ lookup('env', 'PWD') }}/scripts" remote_helm_charts_base: "/tmp/helm-charts" -ansible_python_interpreter: /usr/bin/python3 +ansible_python_interpreter: "{{ lookup('env', 'ANSIBLE_PYTHON_INTERPRETER') or '/usr/bin/python3' }}" remote_home_scripts_dir: "{{ lookup('env', 'PWD') }}/scripts" \ No newline at end of file diff --git a/core/inventory/metadata/vars/inference_delegate.yml b/core/inventory/metadata/vars/inference_delegate.yml index d554648f..3d0a9db8 100644 --- a/core/inventory/metadata/vars/inference_delegate.yml +++ b/core/inventory/metadata/vars/inference_delegate.yml @@ -1,3 +1,3 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 inference_delegate: kube_control_plane \ No newline at end of file diff --git a/core/inventory/metadata/vars/inference_genai_gateway.yml b/core/inventory/metadata/vars/inference_genai_gateway.yml index 614f5e3e..568f7f84 100644 --- a/core/inventory/metadata/vars/inference_genai_gateway.yml +++ b/core/inventory/metadata/vars/inference_genai_gateway.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 helm_charts_base_genai: "{{ lookup('env', 'PWD') }}/helm-charts/genai-gateway" helm_charts_base_genai_trace: "{{ lookup('env', 'PWD') }}/helm-charts/genai-gateway-trace" \ No newline at end of file diff --git a/core/inventory/metadata/vars/inference_keycloak_apisix.yml b/core/inventory/metadata/vars/inference_keycloak_apisix.yml index 169bf7c9..ee6d5bdd 100644 --- a/core/inventory/metadata/vars/inference_keycloak_apisix.yml +++ b/core/inventory/metadata/vars/inference_keycloak_apisix.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 ingress_file: "all_models_apisix_ingres_nginx.yaml" keycloak_url: "https://{{ secret_name }}" diff --git a/core/inventory/metadata/vars/inference_llm_models.yml b/core/inventory/metadata/vars/inference_llm_models.yml index 2384ed59..51694ea4 100644 --- a/core/inventory/metadata/vars/inference_llm_models.yml +++ b/core/inventory/metadata/vars/inference_llm_models.yml @@ -1,6 +1,6 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -ansible_python_interpreter: /usr/bin/python3 +ansible_python_interpreter: "{{ lookup('env', 'ANSIBLE_PYTHON_INTERPRETER') or '/usr/bin/python3' }}" helm_charts_base: "{{ lookup('env', 'PWD') }}/helm-charts" remote_home_dir: "{{ lookup('env', 'PWD') }}/scripts" remote_helm_charts_base: "/tmp/helm-charts" @@ -24,4 +24,7 @@ hugging_face_model_remove_name: 'false' balloon_policy_cpu: 'None' gaudi_values_file: "{{ remote_helm_charts_base }}/vllm/gaudi-values.yaml" huggingface_tensor_parellel_size: 'false' -vllm_metrics_enabled: 'false' \ No newline at end of file +vllm_metrics_enabled: 'false' +# Total CPUs reserved across all NUMA nodes for system components (keycloak, apisix, observability) +# This fixed total is distributed evenly across NUMA nodes +nri_total_reserved_cpus: 8 diff --git a/core/inventory/metadata/vars/inference_observability.yml b/core/inventory/metadata/vars/inference_observability.yml index 53691be5..e96af7e8 100644 --- a/core/inventory/metadata/vars/inference_observability.yml +++ b/core/inventory/metadata/vars/inference_observability.yml @@ -1,3 +1,3 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 deploy_logging: "no" \ No newline at end of file diff --git a/core/lib/add-node.sh b/core/lib/add-node.sh index 819241ae..ae9e01fb 100644 --- a/core/lib/add-node.sh +++ b/core/lib/add-node.sh @@ -1,5 +1,5 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/core/lib/cluster/config/cluster-config-init.sh b/core/lib/cluster/config/cluster-config-init.sh index 7167601d..2e5564e2 100644 --- a/core/lib/cluster/config/cluster-config-init.sh +++ b/core/lib/cluster/config/cluster-config-init.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 deploy_cluster_config_playbook() { diff --git a/core/lib/cluster/config/label-nodes.sh b/core/lib/cluster/config/label-nodes.sh index 7c83ccff..cf2269b6 100644 --- a/core/lib/cluster/config/label-nodes.sh +++ b/core/lib/cluster/config/label-nodes.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 run_label_nodes_playbook() { diff --git a/core/lib/cluster/config/setup-user-cluster-config.sh b/core/lib/cluster/config/setup-user-cluster-config.sh index 3c6c7067..6f5df92e 100644 --- a/core/lib/cluster/config/setup-user-cluster-config.sh +++ b/core/lib/cluster/config/setup-user-cluster-config.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 run_kube_conf_copy_playbook() { diff --git a/core/lib/cluster/deployment/cluster-purge.sh b/core/lib/cluster/deployment/cluster-purge.sh index f377f9c3..95a3f8c6 100644 --- a/core/lib/cluster/deployment/cluster-purge.sh +++ b/core/lib/cluster/deployment/cluster-purge.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 run_reset_playbook() { diff --git a/core/lib/cluster/deployment/cluster-update.sh b/core/lib/cluster/deployment/cluster-update.sh index f4f69ee7..ad8d60cb 100644 --- a/core/lib/cluster/deployment/cluster-update.sh +++ b/core/lib/cluster/deployment/cluster-update.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 update_cluster() { diff --git a/core/lib/cluster/deployment/fresh-install.sh b/core/lib/cluster/deployment/fresh-install.sh index f4901f10..e4c78c79 100644 --- a/core/lib/cluster/deployment/fresh-install.sh +++ b/core/lib/cluster/deployment/fresh-install.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -19,7 +19,7 @@ fresh_installation() { echo "Deployment configuration: $deploy_kubernetes_fresh" - if [[ "$deploy_kubernetes_fresh" == "no" && "$deploy_habana_ai_operator" == "no" && "$deploy_ingress_controller" == "no" && "$deploy_keycloak" == "no" && "$deploy_apisix" == "no" && "$deploy_llm_models" == "no" && "$deploy_observability" == "no" && "$deploy_genai_gateway" == "no" && "$deploy_istio" == "no" && "$deploy_ceph" == "no" && "$uninstall_ceph" == "no" && "$deploy_nri_balloon_policy" == "no" ]]; then + if [[ "$deploy_kubernetes_fresh" == "no" && "$deploy_habana_ai_operator" == "no" && "$deploy_ingress_controller" == "no" && "$deploy_keycloak" == "no" && "$deploy_apisix" == "no" && "$deploy_llm_models" == "no" && "$deploy_observability" == "no" && "$deploy_genai_gateway" == "no" && "$deploy_istio" == "no" && "$deploy_ceph" == "no" && "$uninstall_ceph" == "no" && "$deploy_nri_balloon_policy" == "no" && "$deploy_agenticai_plugin" == "no" ]]; then # Check if all deployment steps are set to "no" after getting user input echo "No installation or deployment steps selected. Skipping setup_initial_env..." @@ -127,6 +127,28 @@ fresh_installation() { else echo "Skipping Observability deployment..." fi + # Deploy Plugins + # -------------- + # Plugins are deployed after core infrastructure is ready + + if [[ "$deploy_agenticai_plugin" == "yes" ]]; then + echo "Deploying Agentic AI Plugin..." + ansible-playbook -i "${INVENTORY_PATH}" ../../plugins/agenticai/playbooks/deploy-agenticai-plugin.yml \ + --extra-vars "cluster_url=${cluster_url} \ + cert_file=${cert_file} \ + key_file=${key_file} \ + kubernetes_platform=${kubernetes_platform}" \ + --vault-password-file "$vault_pass_file" + if [ $? -eq 0 ]; then + echo "Agentic AI Plugin deployed successfully." + else + echo "Failed to deploy Agentic AI Plugin. Exiting!." + exit 1 + fi + else + echo "Skipping Agentic AI Plugin deployment..." + fi + if [[ "$deploy_istio" == "yes" ]]; then echo "Deploying Istio..." execute_and_check "Deploying Istio..." deploy_istio_playbook "$@" \ diff --git a/core/lib/cluster/drv-fw-update.sh b/core/lib/cluster/drv-fw-update.sh index 96cc8e13..44367b9c 100644 --- a/core/lib/cluster/drv-fw-update.sh +++ b/core/lib/cluster/drv-fw-update.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/core/lib/cluster/nodes/add-node.sh b/core/lib/cluster/nodes/add-node.sh index 8ce9dd5a..6e976731 100644 --- a/core/lib/cluster/nodes/add-node.sh +++ b/core/lib/cluster/nodes/add-node.sh @@ -1,5 +1,5 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 add_inference_nodes_playbook() { diff --git a/core/lib/cluster/nodes/remove-node.sh b/core/lib/cluster/nodes/remove-node.sh index c385ab7b..675c4439 100644 --- a/core/lib/cluster/nodes/remove-node.sh +++ b/core/lib/cluster/nodes/remove-node.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 remove_inference_nodes_playbook() { diff --git a/core/lib/cluster/state/cluster-state-check.sh b/core/lib/cluster/state/cluster-state-check.sh index 90fafd22..4fb6ea9f 100644 --- a/core/lib/cluster/state/cluster-state-check.sh +++ b/core/lib/cluster/state/cluster-state-check.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 check_cluster_state() { diff --git a/core/lib/components/genai-gateway-controller.sh b/core/lib/components/genai-gateway-controller.sh index 490d666d..f0ef44d5 100644 --- a/core/lib/components/genai-gateway-controller.sh +++ b/core/lib/components/genai-gateway-controller.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 run_genai_gateway_playbook() { diff --git a/core/lib/components/ingress-controller.sh b/core/lib/components/ingress-controller.sh index 0c7f114d..fc15d7a4 100644 --- a/core/lib/components/ingress-controller.sh +++ b/core/lib/components/ingress-controller.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 run_ingress_nginx_playbook() { diff --git a/core/lib/components/intel-base-operator.sh b/core/lib/components/intel-base-operator.sh index 6fab02af..e7bcbbda 100644 --- a/core/lib/components/intel-base-operator.sh +++ b/core/lib/components/intel-base-operator.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 run_deploy_habana_ai_operator_playbook() { diff --git a/core/lib/components/keycloak-controller.sh b/core/lib/components/keycloak-controller.sh index 815e5392..fc3e786c 100644 --- a/core/lib/components/keycloak-controller.sh +++ b/core/lib/components/keycloak-controller.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 run_keycloak_playbook() { diff --git a/core/lib/components/kubernetes-setup.sh b/core/lib/components/kubernetes-setup.sh index 9703acd5..d7530651 100644 --- a/core/lib/components/kubernetes-setup.sh +++ b/core/lib/components/kubernetes-setup.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 install_kubernetes() { diff --git a/core/lib/components/observability-controller.sh b/core/lib/components/observability-controller.sh index eb3f3c4b..bf37126c 100644 --- a/core/lib/components/observability-controller.sh +++ b/core/lib/components/observability-controller.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 deploy_observability_playbook() { diff --git a/core/lib/components/service-mesh/install-istio.sh b/core/lib/components/service-mesh/install-istio.sh index cdc648d6..e0694651 100644 --- a/core/lib/components/service-mesh/install-istio.sh +++ b/core/lib/components/service-mesh/install-istio.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 deploy_istio_playbook() { diff --git a/core/lib/components/storage/install-ceph-cluster.sh b/core/lib/components/storage/install-ceph-cluster.sh index 5d38b228..a8f3c9ca 100644 --- a/core/lib/components/storage/install-ceph-cluster.sh +++ b/core/lib/components/storage/install-ceph-cluster.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 deploy_ceph_cluster() { diff --git a/core/lib/components/storage/uninstall-ceph-cluster.sh b/core/lib/components/storage/uninstall-ceph-cluster.sh index 616c7301..cc280c37 100644 --- a/core/lib/components/storage/uninstall-ceph-cluster.sh +++ b/core/lib/components/storage/uninstall-ceph-cluster.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 uninstall_ceph_cluster() { diff --git a/core/lib/models/install-model-hf.sh b/core/lib/models/install-model-hf.sh index 0ee866a4..08a2b146 100644 --- a/core/lib/models/install-model-hf.sh +++ b/core/lib/models/install-model-hf.sh @@ -1,5 +1,5 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 deploy_from_huggingface() { diff --git a/core/lib/models/install-model.sh b/core/lib/models/install-model.sh index 49d3f2db..40321f8d 100644 --- a/core/lib/models/install-model.sh +++ b/core/lib/models/install-model.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 deploy_inference_llm_models_playbook() { @@ -108,7 +108,23 @@ add_model() { setup_bastion "$@" INVENTORY_PATH=$brownfield_deployment_host_file fi - invoke_prereq_workflows "$@" + invoke_prereq_workflows "$@" + + # Deploy NRI CPU Balloons for CPU deployments (after all infrastructure, before models) + if [[ "$deploy_nri_balloon_policy" == "yes" ]]; then + # Ensure this is a CPU deployment + if [[ "$cpu_or_gpu" != "c" ]]; then + echo "${RED}Error: NRI Balloon Policy can only be deployed for CPU deployments (cpu_or_gpu='c')${NC}" + echo "${RED}Current cpu_or_gpu setting: '$cpu_or_gpu'${NC}" + echo "${RED}Please set cpu_or_gpu to 'c' or disable NRI balloon policy deployment. Exiting!${NC}" + exit 1 + fi + execute_and_check "Deploying CPU Optimization (NRI Balloons & Topology Detection)..." deploy_nri_balloons_playbook "$@" \ + "CPU optimization deployed successfully." \ + "Failed to deploy CPU optimization. Exiting!." + else + echo "Skipping CPU optimization deployment..." + fi execute_and_check "Deploying Inference LLM Models..." deploy_inference_llm_models_playbook "$@" \ "Inference LLM Model is deployed successfully." \ "Failed to deploy Inference LLM Model Exiting!." @@ -125,4 +141,4 @@ add_model() { echo "Please refer to this comprehensive guide for detailed instructions." echo "" fi -} \ No newline at end of file +} diff --git a/core/lib/models/list-model.sh b/core/lib/models/list-model.sh index 2e9d71f6..7cf95534 100644 --- a/core/lib/models/list-model.sh +++ b/core/lib/models/list-model.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 list_inference_llm_models_playbook() { diff --git a/core/lib/models/model-selection.sh b/core/lib/models/model-selection.sh index 69d8a531..f4248c35 100644 --- a/core/lib/models/model-selection.sh +++ b/core/lib/models/model-selection.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 model_selection(){ diff --git a/core/lib/models/uninstall-model-hf.sh b/core/lib/models/uninstall-model-hf.sh index 8fdafaa5..f389d6e3 100644 --- a/core/lib/models/uninstall-model-hf.sh +++ b/core/lib/models/uninstall-model-hf.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/core/lib/models/uninstall-model.sh b/core/lib/models/uninstall-model.sh index a86fc1e8..13d16542 100644 --- a/core/lib/models/uninstall-model.sh +++ b/core/lib/models/uninstall-model.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 remove_inference_llm_models_playbook() { diff --git a/core/lib/system/config-vars.sh b/core/lib/system/config-vars.sh index f811565d..eb27bc7b 100644 --- a/core/lib/system/config-vars.sh +++ b/core/lib/system/config-vars.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 HOMEDIR="$(pwd)" diff --git a/core/lib/system/execute-and-check.sh b/core/lib/system/execute-and-check.sh index 47b5d6b1..265b9f0b 100644 --- a/core/lib/system/execute-and-check.sh +++ b/core/lib/system/execute-and-check.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 execute_and_check() { diff --git a/core/lib/system/precheck/prereq-check.sh b/core/lib/system/precheck/prereq-check.sh index be80cebc..31122211 100644 --- a/core/lib/system/precheck/prereq-check.sh +++ b/core/lib/system/precheck/prereq-check.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 run_system_prerequisites_check() { diff --git a/core/lib/system/precheck/read-config-file.sh b/core/lib/system/precheck/read-config-file.sh index aece9370..45c6baf1 100644 --- a/core/lib/system/precheck/read-config-file.sh +++ b/core/lib/system/precheck/read-config-file.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 read_config_file() { diff --git a/core/lib/system/precheck/readiness-check.sh b/core/lib/system/precheck/readiness-check.sh index 4a5cf5c0..0cdf4f5b 100644 --- a/core/lib/system/precheck/readiness-check.sh +++ b/core/lib/system/precheck/readiness-check.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 run_infrastructure_readiness_check() { diff --git a/core/lib/system/setup-env.sh b/core/lib/system/setup-env.sh index e53339a8..d92caf36 100644 --- a/core/lib/system/setup-env.sh +++ b/core/lib/system/setup-env.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 setup_initial_env() { @@ -70,7 +70,7 @@ setup_initial_env() { else echo "Virtual environment activated successfully. Path: $VIRTUAL_ENV" fi - + export PIP_BREAK_SYSTEM_PACKAGES=1 $VENVDIR/bin/python3 -m pip install --upgrade pip $VENVDIR/bin/python3 -m pip install -U -r requirements.txt @@ -180,4 +180,4 @@ invoke_prereq_workflows() { install_ansible_collection() { echo "Installing community.general collection..." ansible-galaxy collection install community.general -} \ No newline at end of file +} diff --git a/core/lib/user-menu/parse-user-prompts.sh b/core/lib/user-menu/parse-user-prompts.sh index 2d3e5ad6..0928bdf2 100644 --- a/core/lib/user-menu/parse-user-prompts.sh +++ b/core/lib/user-menu/parse-user-prompts.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 parse_arguments() { diff --git a/core/lib/user-menu/user-menu.sh b/core/lib/user-menu/user-menu.sh index 64e9be43..15f1d199 100644 --- a/core/lib/user-menu/user-menu.sh +++ b/core/lib/user-menu/user-menu.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 update_drivers_and_firmware() { diff --git a/core/lib/xeon/ballon-policy.sh b/core/lib/xeon/ballon-policy.sh index 5cd57f5d..69069fa6 100644 --- a/core/lib/xeon/ballon-policy.sh +++ b/core/lib/xeon/ballon-policy.sh @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 deploy_nri_balloons_playbook() { diff --git a/core/playbooks/deploy-ceph-storage.yml b/core/playbooks/deploy-ceph-storage.yml index a3e94c81..2c3959ad 100644 --- a/core/playbooks/deploy-ceph-storage.yml +++ b/core/playbooks/deploy-ceph-storage.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Rook Ceph Cluster Minimal Setup diff --git a/core/playbooks/deploy-cluster-config.yml b/core/playbooks/deploy-cluster-config.yml index f8ebdd64..bcd66d3f 100644 --- a/core/playbooks/deploy-cluster-config.yml +++ b/core/playbooks/deploy-cluster-config.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- #- import_playbook: "{{ lookup('env', 'PWD') }}/config/inference_env.yml" diff --git a/core/playbooks/deploy-cpu-optimization.yml b/core/playbooks/deploy-cpu-optimization.yml index bab01930..078140f3 100644 --- a/core/playbooks/deploy-cpu-optimization.yml +++ b/core/playbooks/deploy-cpu-optimization.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- diff --git a/core/playbooks/deploy-gaudi-firmware-driver.yml b/core/playbooks/deploy-gaudi-firmware-driver.yml index f98a3266..d962082a 100644 --- a/core/playbooks/deploy-gaudi-firmware-driver.yml +++ b/core/playbooks/deploy-gaudi-firmware-driver.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Deploy Gaudi Firmware Driver diff --git a/core/playbooks/deploy-genai-gateway.yml b/core/playbooks/deploy-genai-gateway.yml index ec6d58a0..97f0c53b 100644 --- a/core/playbooks/deploy-genai-gateway.yml +++ b/core/playbooks/deploy-genai-gateway.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Deploy GenAI Gateway @@ -186,7 +186,7 @@ annotations: alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}' alb.ingress.kubernetes.io/certificate-arn: "{{ aws_certificate_arn | default('') }}" - alb.ingress.kubernetes.io/group.name: eks-genai-trace + alb.ingress.kubernetes.io/group.name: ei-eks alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]' alb.ingress.kubernetes.io/reconcile: now alb.ingress.kubernetes.io/scheme: internet-facing @@ -228,6 +228,7 @@ --set langfuse.ingress.className={{ 'alb' if kubernetes_platform == 'eks' else 'nginx' }} --set langfuse.ingress.tls.enabled=true --set langfuse.ingress.hosts[0].host=trace-{{ secret_name }} + --set langfuse.ingress.hosts[0].paths[0].pathType=Prefix --set langfuse.ingress.tls.secretName=trace-{{ secret_name }} --set langfuse.nextauth.url=https://trace-{{ secret_name }} --set langfuse.route.enabled=false diff --git a/core/playbooks/deploy-habana-ai-operator.yml b/core/playbooks/deploy-habana-ai-operator.yml index 5065f9b4..8dc26935 100644 --- a/core/playbooks/deploy-habana-ai-operator.yml +++ b/core/playbooks/deploy-habana-ai-operator.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Deploy Habana AI Operator Namespace and Labels diff --git a/core/playbooks/deploy-inference-models.yml b/core/playbooks/deploy-inference-models.yml index fd5a37a6..122e7ddc 100644 --- a/core/playbooks/deploy-inference-models.yml +++ b/core/playbooks/deploy-inference-models.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Deploy/Remove Inference LLM Models @@ -36,8 +36,6 @@ - name: Set default model parallelism configuration for CPU deployments ansible.builtin.set_fact: model_parallelism_config: - tensor_parallel_size: "{{ model_parallelism_config.tensor_parallel_size | default(1) }}" - pipeline_parallel_size: "{{ model_parallelism_config.pipeline_parallel_size | default(1) }}" strategy: "{{ model_parallelism_config.strategy | default('tensor_parallel') }}" when: - cpu_playbook == 'true' @@ -276,67 +274,30 @@ is_single_node: "{{ cpu_topology_config.total_sockets == 1 and cpu_topology_config.numa_nodes_per_socket == 1 }}" reserved_cpus_percentage: "{{ reserved_cpus_percentage | default(18) | int }}" reserved_cpus: >- - {%- set total_cpus = cpu_topology_config.cpus_per_socket | int -%} - {%- set percentage = reserved_cpus_percentage | default(18) | int -%} - {%- set calculated_reserved = ((total_cpus * percentage) / 100) | round(0, 'ceil') | int -%} - {%- set min_reserved = 2 -%} - {%- set max_reserved = total_cpus // 2 -%} - {%- if calculated_reserved < min_reserved -%} - {{ min_reserved }} - {%- elif calculated_reserved > max_reserved -%} - {{ max_reserved }} - {%- else -%} - {{ calculated_reserved }} - {%- endif -%} + {%- set reserved_cpu_count = nri_reserved_cpu_list.split(',') | length -%} + {{ reserved_cpu_count }} balloon_cpus: >- - {%- set total_cpus = cpu_topology_config.cpus_per_socket | int -%} - {%- set percentage = reserved_cpus_percentage | default(18) | int -%} - {%- set calculated_reserved = ((total_cpus * percentage) / 100) | round(0, 'ceil') | int -%} - {%- set min_reserved = 2 -%} - {%- set max_reserved = total_cpus // 2 -%} - {%- if calculated_reserved < min_reserved -%} - {%- set final_reserved = min_reserved -%} - {%- elif calculated_reserved > max_reserved -%} - {%- set final_reserved = max_reserved -%} - {%- else -%} - {%- set final_reserved = calculated_reserved -%} - {%- endif -%} - {%- if cpu_topology_config.total_sockets == 1 and cpu_topology_config.numa_nodes_per_socket == 1 -%} - {{ ((total_cpus - final_reserved) / 2) | int }} - {%- else -%} - {{ total_cpus - final_reserved }} - {%- endif -%} - workload_cpus: >- - {%- set total_cpus = cpu_topology_config.cpus_per_socket | int -%} - {%- set percentage = reserved_cpus_percentage | default(18) | int -%} - {%- set calculated_reserved = ((total_cpus * percentage) / 100) | round(0, 'ceil') | int -%} - {%- set min_reserved = 2 -%} - {%- set max_reserved = total_cpus // 2 -%} - {%- if calculated_reserved < min_reserved -%} - {%- set final_reserved = min_reserved -%} - {%- elif calculated_reserved > max_reserved -%} - {%- set final_reserved = max_reserved -%} - {%- else -%} - {%- set final_reserved = calculated_reserved -%} - {%- endif -%} - {{ total_cpus - final_reserved }} + {%- set total_cpus = (cpu_topology_config.cpus_per_socket | int) * (cpu_topology_config.total_sockets | int) -%} + {%- set reserved_cpu_count = nri_reserved_cpu_list.split(',') | length -%} + {%- if reserved_cpu_count >= total_cpus -%}{%- set reserved_cpu_count = total_cpus -%}{%- endif -%} + {{ total_cpus - reserved_cpu_count }} + workload_cpus: "{{ cpu_topology_config.workload_cpus | int }}" - name: Calculate parallelism configuration ansible.builtin.set_fact: parallelism_config: tensor_parallel_size: >- - {%- set numa_per_socket = cpu_topology_config.numa_nodes_per_socket | int -%} - {%- if numa_per_socket in [2, 4] -%} - {{ numa_per_socket }} - {%- elif numa_per_socket in [3] -%} + {%- set total_numa = (cpu_topology_config.total_sockets | int) * (cpu_topology_config.numa_nodes_per_socket | int) -%} + {%- if total_numa in [1, 2, 4] -%} + {{ total_numa }} + {%- elif total_numa in [3] -%} 2 - {%- elif numa_per_socket in [6] -%} + {%- elif total_numa in [6] -%} 4 {%- else -%} 1 {%- endif -%} strategy: >- - {%- set numa_per_socket = cpu_topology_config.numa_nodes_per_socket | int -%} tensor_parallel - name: Combine topology and parallelism configs @@ -354,7 +315,6 @@ - "NUMA Nodes per Socket: {{ optimal_balloon_config.numa_nodes_per_socket }}" - "CPUs per Socket: {{ optimal_balloon_config.cpus_per_socket }}" - "Is Single Node: {{ optimal_balloon_config.is_single_node }}" - - "Reserved CPUs Percentage: {{ optimal_balloon_config.reserved_cpus_percentage }}%" - "Reserved CPUs Count: {{ optimal_balloon_config.reserved_cpus }} ({{ ((optimal_balloon_config.reserved_cpus | int / optimal_balloon_config.cpus_per_socket | int) * 100) | round(1) }}% actual)" - "Workload CPUs (for xeon-values.yaml): {{ optimal_balloon_config.workload_cpus }}" - "Balloon CPUs: {{ optimal_balloon_config.balloon_cpus }}" @@ -403,17 +363,50 @@ when: usable_mem_var is string and socket0_memory_gb | int == 0 ignore_errors: true - - name: Set optimal memory with minimum 8GB and maximum 256GB + - name: Calculate minimum memory based on tensor parallel size + ansible.builtin.set_fact: + min_memory_for_tp: >- + {%- set tp = optimal_balloon_config.tensor_parallel_size | default(1) | int -%} + {%- if tp >= 4 -%} + 256 + {%- elif tp >= 2 -%} + 128 + {%- else -%} + 128 + {%- endif -%} + + - name: Set optimal memory based on TP requirements and available memory ansible.builtin.set_fact: - optimal_memory_gb: "{% if socket0_memory_gb | int < 8 %}8{% elif socket0_memory_gb | int > 256 %}256{% else %}{{ socket0_memory_gb | int }}{% endif %}" + optimal_memory_gb: >- + {%- set available = socket0_memory_gb | int -%} + {%- set min_required = min_memory_for_tp | int -%} + {%- set max_limit = 256 -%} + {%- if available < min_required -%} + {{ min_required }} + {%- elif available > max_limit -%} + {{ max_limit }} + {%- else -%} + {{ available }} + {%- endif -%} - name: Final memory calculation ansible.builtin.debug: msg: | === Memory Calculation Summary === + Tensor Parallel Size: {{ optimal_balloon_config.tensor_parallel_size | default(1) }} + Minimum memory required for TP: {{ min_memory_for_tp }} GB Socket 0 memory available: {{ socket0_memory_gb }} GB Final memory for deployment: {{ optimal_memory_gb }} GB - Min threshold: 8 GB, Max threshold: 256 GB + Memory allocation: {{ 'Using minimum required' if (socket0_memory_gb | int) < (min_memory_for_tp | int) else 'Using available memory' if (socket0_memory_gb | int) <= 256 else 'Capped at 256GB max' }} + + - name: Apply optimized TP/PP defaults (unless user overrides) + ansible.builtin.set_fact: + tensor_parallel_size: "{{ tensor_parallel_size | default(optimal_balloon_config.get('tensor_parallel_size', 1)) | int }}" + pipeline_parallel_size: "{{ pipeline_parallel_size | default(optimal_balloon_config.get('pipeline_parallel_size', 1)) | int }}" + when: + - cpu_playbook == 'true' + tags: always + run_once: true - name: Display CPU optimization helm charts update completion debug: @@ -738,6 +731,9 @@ {% if cpu_playbook == 'true' %} --values {{ remote_helm_charts_base }}/vllm/xeon-values.yaml --set cpu_balloon_annotation="vllm-balloon" + --set podLabels.name="vllm" + --set cpu="{{ optimal_balloon_config.workload_cpus | default(8) }}" + --set memory="{{ optimal_memory_gb | default(8) }}Gi" --set tensor_parallel_size={{ optimal_balloon_config.tensor_parallel_size | default(1) }} --set pipeline_parallel_size={{ optimal_balloon_config.pipeline_parallel_size | default(1) }} {% elif gaudi_deployment|lower == "true" %} @@ -1991,8 +1987,8 @@ --set podLabels.name="vllm" --set cpu="{{ optimal_balloon_config.workload_cpus | default(8) }}" --set memory="{{ optimal_memory_gb | default(8) }}Gi" - --set tensor_parallel_size={{ model_parallelism_config.tensor_parallel_size | default(1) }} - --set pipeline_parallel_size={{ model_parallelism_config.pipeline_parallel_size | default(1) }} + --set tensor_parallel_size={{ tensor_parallel_size | default(1) }} + --set pipeline_parallel_size={{ pipeline_parallel_size | default(1) }} {% endif %} {% if apisix_enabled %} --set apisix.enabled={{ apisix_enabled }} @@ -2094,8 +2090,8 @@ --set podLabels.name="vllm" --set cpu="{{ optimal_balloon_config.workload_cpus | default(8) }}" --set memory="{{ optimal_memory_gb | default(8) }}Gi" - --set tensor_parallel_size={{ model_parallelism_config.tensor_parallel_size | default(1) }} - --set pipeline_parallel_size={{ model_parallelism_config.pipeline_parallel_size | default(1) }} + --set tensor_parallel_size={{ model_paralletensor_parallel_size | default(1) }} + --set pipeline_parallel_size={{ pipeline_parallel_size | default(1) }} {% endif %} {% if apisix_enabled %} --set apisix.enabled={{ apisix_enabled }} @@ -2196,8 +2192,8 @@ --set podLabels.name="vllm" --set cpu="{{ optimal_balloon_config.workload_cpus | default(8) }}" --set memory="{{ optimal_memory_gb | default(8) }}Gi" - --set tensor_parallel_size={{ model_parallelism_config.tensor_parallel_size | default(1) }} - --set pipeline_parallel_size={{ model_parallelism_config.pipeline_parallel_size | default(1) }} + --set tensor_parallel_size={{ tensor_parallel_size | default(1) }} + --set pipeline_parallel_size={{ pipeline_parallel_size | default(1) }} {% endif %} {% if apisix_enabled %} --set apisix.enabled={{ apisix_enabled }} @@ -2301,8 +2297,8 @@ --set podLabels.name="vllm" --set cpu="{{ optimal_balloon_config.workload_cpus | default(8) }}" --set memory="{{ optimal_memory_gb | default(8) }}Gi" - --set tensor_parallel_size={{ model_parallelism_config.tensor_parallel_size | default(1) }} - --set pipeline_parallel_size={{ model_parallelism_config.pipeline_parallel_size | default(1) }} + --set tensor_parallel_size={{ tensor_parallel_size | default(1) }} + --set pipeline_parallel_size={{ pipeline_parallel_size | default(1) }} {% endif %} {% if apisix_enabled %} --set apisix.enabled={{ apisix_enabled }} @@ -2411,8 +2407,8 @@ --set podLabels.name="vllm" --set cpu="{{ optimal_balloon_config.workload_cpus | default(8) }}" --set memory="{{ optimal_memory_gb | default(8) }}Gi" - --set tensor_parallel_size={{ model_parallelism_config.tensor_parallel_size | default(1) }} - --set pipeline_parallel_size={{ model_parallelism_config.pipeline_parallel_size | default(1) }} + --set tensor_parallel_size={{ tensor_parallel_size | default(1) }} + --set pipeline_parallel_size={{ pipeline_parallel_size | default(1) }} {% endif %} {% if apisix_enabled %} --set apisix.enabled={{ apisix_enabled }} @@ -2516,8 +2512,8 @@ --set podLabels.name="vllm" --set cpu="{{ optimal_balloon_config.workload_cpus | default(8) }}" --set memory="{{ optimal_memory_gb | default(8) }}Gi" - --set tensor_parallel_size={{ model_parallelism_config.tensor_parallel_size | default(1) }} - --set pipeline_parallel_size={{ model_parallelism_config.pipeline_parallel_size | default(1) }} + --set tensor_parallel_size={{ tensor_parallel_size | default(1) }} + --set pipeline_parallel_size={{ pipeline_parallel_size | default(1) }} {% endif %} {% if apisix_enabled %} --set apisix.enabled={{ apisix_enabled }} @@ -2592,4 +2588,4 @@ tags: always ansible.builtin.file: path: "{{ remote_helm_charts_base }}" - state: absent + state: absent \ No newline at end of file diff --git a/core/playbooks/deploy-ingress-controller.yml b/core/playbooks/deploy-ingress-controller.yml index 382aa486..5e0fd648 100644 --- a/core/playbooks/deploy-ingress-controller.yml +++ b/core/playbooks/deploy-ingress-controller.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Deploy Ingress NGINX using Helm @@ -25,13 +25,8 @@ ansible.builtin.shell: helm repo update when: helm_repo_list.stdout != "" - - name: Retrieving the Number of Inference Infrastructure Nodes - ansible.builtin.shell: kubectl get nodes --selector=role=inference-infra -o jsonpath='{.items[*].metadata.name}' | wc -w - register: inference_node_count - changed_when: false - - - name: Retrieving the Number of Infrastructure Nodes - ansible.builtin.shell: kubectl get nodes --selector=role=infra -o jsonpath='{.items[*].metadata.name}' | wc -w + - name: Retrieving the Number of Infrastructure Nodes (ei-infra-eligible) + ansible.builtin.shell: kubectl get nodes --selector=ei-infra-eligible=true -o jsonpath='{.items[*].metadata.name}' | wc -w register: inference_node_count changed_when: false @@ -85,9 +80,9 @@ requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists diff --git a/core/playbooks/deploy-istio-openshift.yml b/core/playbooks/deploy-istio-openshift.yml index fde91183..9841d9d0 100644 --- a/core/playbooks/deploy-istio-openshift.yml +++ b/core/playbooks/deploy-istio-openshift.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Install and configure Istio (OpenShift Service Mesh) diff --git a/core/playbooks/deploy-istio.yml b/core/playbooks/deploy-istio.yml index 68729655..2c4aff9c 100644 --- a/core/playbooks/deploy-istio.yml +++ b/core/playbooks/deploy-istio.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Install and configure Istio in Ambient mode diff --git a/core/playbooks/deploy-keycloak-controller.yml b/core/playbooks/deploy-keycloak-controller.yml index a0a7cedf..de0220c6 100644 --- a/core/playbooks/deploy-keycloak-controller.yml +++ b/core/playbooks/deploy-keycloak-controller.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Deploy/Remove Keycloak controller diff --git a/core/playbooks/deploy-keycloak-service.yml b/core/playbooks/deploy-keycloak-service.yml index 3656dcd0..30219569 100644 --- a/core/playbooks/deploy-keycloak-service.yml +++ b/core/playbooks/deploy-keycloak-service.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Deploy Keycloak controller diff --git a/core/playbooks/deploy-keycloak-tls-cert.yml b/core/playbooks/deploy-keycloak-tls-cert.yml index f865c834..51ef0325 100644 --- a/core/playbooks/deploy-keycloak-tls-cert.yml +++ b/core/playbooks/deploy-keycloak-tls-cert.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Create Keycloak TLS secret @@ -55,16 +55,16 @@ - name: Display the output debug: msg: "Secret {{ secret_name }} created." - - name: Retrieve number of Inference Infrastructure Nodes - ansible.builtin.shell: kubectl get nodes --selector=role=inference-infra --no-headers | wc -l + - name: Retrieve number of Infrastructure Nodes (ei-infra-eligible) + ansible.builtin.shell: kubectl get nodes --selector=ei-infra-eligible=true --no-headers | wc -l register: inference_node_count changed_when: false - - name: Set Keycloak Replicas based upon Inference Infra Nodes + - name: Set Keycloak Replicas based upon Infrastructure Nodes set_fact: inference_infra_replica_count: "{{ inference_node_count.stdout | int if inference_node_count.stdout | int > 0 else 1 }}" - - name: Display the number of Inference Infrastructure Nodes + - name: Display the number of Infrastructure Nodes debug: - msg: "Number of inference-infra node configured: {{ inference_infra_replica_count }}" + msg: "Number of ei-infra-eligible nodes configured: {{ inference_infra_replica_count }}" run_once: true when: deploy_keycloak == "yes" - name: Delete genai-gateway-ingress resource @@ -102,32 +102,36 @@ persistence: enabled: true size: 8Gi + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: ei-infra-eligible + operator: In + values: ["true"] volumePermissions: enabled: true image: repository: bitnamilegacy/os-shell - tag: 12-debian-12-r48 - tolerations: - - key: node-role.kubernetes.io/control-plane - operator: Exists - effect: NoSchedule - - key: node-role.kubernetes.io/master - operator: Exists - effect: NoSchedule - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: role - operator: In - values: ["infra"] - - matchExpressions: - - key: node-role.kubernetes.io/control-plane - operator: Exists + tag: 12-debian-12-r48 production: true proxy: edge replicaCount: "{{ inference_infra_replica_count | int }}" + resources: + requests: + memory: "2Gi" + cpu: "500m" + limits: + memory: "4Gi" + cpu: "1500m" extraEnvVars: - name: KC_CACHE value: ispn @@ -183,9 +187,9 @@ requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-infra-eligible operator: In - values: ["infra"] + values: ["true"] - matchExpressions: - key: node-role.kubernetes.io/control-plane operator: Exists @@ -291,10 +295,11 @@ annotations: alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}' alb.ingress.kubernetes.io/certificate-arn: "{{ aws_certificate_arn | default('') }}" - alb.ingress.kubernetes.io/group.name: keycloak-apisix + alb.ingress.kubernetes.io/group.name: ei-eks alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]' alb.ingress.kubernetes.io/scheme: internet-facing alb.ingress.kubernetes.io/target-type: ip + alb.ingress.kubernetes.io/group.order: '3' spec: ingressClassName: alb rules: diff --git a/core/playbooks/deploy-observability-openshift.yml b/core/playbooks/deploy-observability-openshift.yml index bd435ed7..a559a1e0 100644 --- a/core/playbooks/deploy-observability-openshift.yml +++ b/core/playbooks/deploy-observability-openshift.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Deploy Observability on OpenShift diff --git a/core/playbooks/deploy-observability.yml b/core/playbooks/deploy-observability.yml index ad57d612..c1e5863a 100644 --- a/core/playbooks/deploy-observability.yml +++ b/core/playbooks/deploy-observability.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Deploy Observability @@ -179,8 +179,8 @@ annotations: alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}' alb.ingress.kubernetes.io/certificate-arn: "{{ aws_certificate_arn | default('') }}" - alb.ingress.kubernetes.io/group.name: eks-genai - alb.ingress.kubernetes.io/group.order: '1' + alb.ingress.kubernetes.io/group.name: ei-eks + alb.ingress.kubernetes.io/group.order: '2' alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]' alb.ingress.kubernetes.io/scheme: internet-facing alb.ingress.kubernetes.io/target-type: ip diff --git a/core/playbooks/generate-ceph-values.yml b/core/playbooks/generate-ceph-values.yml index ce10baba..76f64616 100644 --- a/core/playbooks/generate-ceph-values.yml +++ b/core/playbooks/generate-ceph-values.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Generate Ceph cluster-values.yaml from inventory storage nodes diff --git a/core/playbooks/inference-precheck.yml b/core/playbooks/inference-precheck.yml index e404d249..b924ed49 100644 --- a/core/playbooks/inference-precheck.yml +++ b/core/playbooks/inference-precheck.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Run Inference Prerequisites Check diff --git a/core/playbooks/label-nodes.yml b/core/playbooks/label-nodes.yml index b24160bb..bb63e4a8 100644 --- a/core/playbooks/label-nodes.yml +++ b/core/playbooks/label-nodes.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Label Kubernetes Nodes @@ -39,7 +39,7 @@ register: k8s_nodes run_once: true - - name: Label control plane nodes with role=infra + - name: Label control plane nodes with ei-infra-eligible=true kubernetes.core.k8s: state: patched kind: Node @@ -47,13 +47,13 @@ definition: metadata: labels: - role: "infra" + ei-infra-eligible: "true" loop: "{{ k8s_nodes.resources }}" when: - item.metadata.name in control_plane_nodes or item.metadata.labels['node-role.kubernetes.io/control-plane'] is defined run_once: true - - name: Label worker nodes with role=inference + - name: Label worker/inference nodes with ei-inference-eligible=true kubernetes.core.k8s: state: patched kind: Node @@ -61,10 +61,10 @@ definition: metadata: labels: - role: "inference" + ei-inference-eligible: "true" loop: "{{ k8s_nodes.resources }}" when: - - item.metadata.name in worker_nodes or (item.metadata.labels['node-role.kubernetes.io/control-plane'] is not defined and item.metadata.name not in control_plane_nodes) + - item.metadata.name in worker_nodes run_once: true - name: Verify node labels @@ -77,6 +77,6 @@ debug: msg: | Node: {{ item.metadata.name }} - Labels: {{ item.metadata.labels | dict2items | selectattr('key', 'match', '^role') | list | items2dict(key_name='key', value_name='value') }} + Labels: {{ item.metadata.labels | dict2items | selectattr('key', 'match', '^ei-') | list | items2dict(key_name='key', value_name='value') }} loop: "{{ labeled_nodes.resources }}" run_once: true diff --git a/core/playbooks/register-model-genai-gateway.yml b/core/playbooks/register-model-genai-gateway.yml index ac800ad2..12f51e24 100644 --- a/core/playbooks/register-model-genai-gateway.yml +++ b/core/playbooks/register-model-genai-gateway.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Create Kubernetes Job for model registration diff --git a/core/playbooks/reset.yml b/core/playbooks/reset.yml index a5d56d55..e1606beb 100644 --- a/core/playbooks/reset.yml +++ b/core/playbooks/reset.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Common tasks for every playbooks diff --git a/core/playbooks/setup-bastion.yml b/core/playbooks/setup-bastion.yml index 09a67db2..2cf681d9 100644 --- a/core/playbooks/setup-bastion.yml +++ b/core/playbooks/setup-bastion.yml @@ -464,6 +464,1210 @@ when: not kubeconfig_check.stat.exists tags: kubeconfig + ############################################################# + # BROWNFIELD DEPLOYMENT PRECHECKS + ############################################################# + + - name: Run Brownfield Deployment Prechecks + block: + - name: Display precheck start message + debug: + msg: + - "========================================" + - "BROWNFIELD DEPLOYMENT PRECHECKS" + - "========================================" + - "Running comprehensive validation checks for brownfield deployment" + - "This includes OS, storage, ingress, DNS, node labels, and resource checks" + tags: precheck + + # OS and System Prechecks + - name: Gather system facts for OS validation + setup: + gather_subset: + - '!all' + - '!min' + - distribution + - distribution_version + - distribution_major_version + - os_family + - kernel + tags: precheck + + - name: Validate deployment machine OS + assert: + that: + - ansible_distribution == "Ubuntu" + - ansible_distribution_version is version('22.04', '>=') + fail_msg: | + PRECHECK FAILED: Unsupported OS on deployment machine + Current OS: {{ ansible_distribution }} {{ ansible_distribution_version }} + Supported: Ubuntu 22.04 or higher + success_msg: "[PASS] Deployment machine OS validation passed: {{ ansible_distribution }} {{ ansible_distribution_version }}" + ignore_errors: yes + register: os_check + tags: precheck + + - name: Display OS check result + debug: + msg: "{{ '[PASS] OS Check: PASSED' if os_check is succeeded else '[WARN] OS Check: WARNING - Unsupported OS detected' }}" + tags: precheck + + # Cluster Connection and Platform Detection Prechecks + - name: Verify cluster connection is available + block: + - name: Test cluster API connectivity + command: kubectl cluster-info --request-timeout=10s + register: cluster_info_check + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + tags: precheck + + - name: Display cluster connectivity result + debug: + msg: "{{ '[PASS] Cluster API Connection: ACCESSIBLE' if cluster_info_check.rc == 0 else '[FAIL] Cluster API Connection: FAILED - Cannot reach cluster' }}" + tags: precheck + + - name: Get cluster version information + shell: kubectl version -o json | jq -r '.serverVersion.gitVersion' + register: k8s_version_check + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: cluster_info_check.rc == 0 + tags: precheck + + - name: Display Kubernetes version + debug: + msg: "[PASS] Kubernetes Version: {{ k8s_version_check.stdout }}" + when: cluster_info_check.rc == 0 and k8s_version_check.rc == 0 + tags: precheck + + when: kubeconfig_check.stat.exists + + # Storage Class Prechecks + - name: Check for available StorageClasses + block: + - name: Get all StorageClasses + command: kubectl get storageclass -o json + register: storage_class_check + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + tags: precheck + + - name: Parse StorageClass information + set_fact: + storage_classes: "{{ (storage_class_check.stdout | from_json)['items'] | map(attribute='metadata.name') | list }}" + default_storage_class: "{{ (storage_class_check.stdout | from_json)['items'] | selectattr('metadata.annotations.storageclass.kubernetes.io/is-default-class', 'defined') | selectattr('metadata.annotations.storageclass.kubernetes.io/is-default-class', 'equalto', 'true') | map(attribute='metadata.name') | first | default('') }}" + when: storage_class_check.rc == 0 + tags: precheck + + - name: Validate StorageClass availability + assert: + that: + - storage_classes is defined + - storage_classes | length > 0 + fail_msg: | + [CRITICAL] No StorageClass found in cluster + At least one StorageClass is required for PVC provisioning + Models require minimum 250GB storage + success_msg: "[PASS] StorageClass Check: {{ storage_classes | length }} StorageClass(es) available" + when: storage_class_check.rc == 0 + ignore_errors: yes + register: storage_class_validation + tags: precheck + + - name: Display StorageClass details + debug: + msg: + - "Available StorageClasses: {{ storage_classes | join(', ') }}" + - "Default StorageClass: {{ default_storage_class if default_storage_class else 'None (must be specified in PVC)' }}" + when: storage_class_check.rc == 0 and storage_classes is defined + tags: precheck + + # Test PVC creation and binding + - name: Create test PVC to validate storage provisioning + kubernetes.core.k8s: + state: present + definition: + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: test-pvc-precheck + namespace: default + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + storageClassName: "{{ default_storage_class if default_storage_class else storage_classes[0] }}" + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: storage_class_check.rc == 0 and storage_classes is defined and storage_classes | length > 0 + register: test_pvc_creation + ignore_errors: yes + tags: precheck + + - name: Create test Pod to consume the PVC (forces binding for WaitForFirstConsumer) + kubernetes.core.k8s: + state: present + definition: + apiVersion: v1 + kind: Pod + metadata: + name: test-pod-precheck + namespace: default + spec: + restartPolicy: Never + volumes: + - name: test-volume + persistentVolumeClaim: + claimName: test-pvc-precheck + containers: + - name: pvc-checker + image: busybox:1.36 + command: ["/bin/sh", "-c", "echo pvc-precheck >/data/out && sleep 120"] + volumeMounts: + - name: test-volume + mountPath: /data + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: test_pvc_creation is succeeded + register: test_pod_creation + ignore_errors: yes + tags: precheck + + - name: Wait for test PVC to be bound (max 90 seconds) + kubernetes.core.k8s_info: + api_version: v1 + kind: PersistentVolumeClaim + name: test-pvc-precheck + namespace: default + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + register: test_pvc_status + until: test_pvc_status.resources[0].status.phase == "Bound" + retries: 18 + delay: 5 + when: test_pvc_creation is succeeded + ignore_errors: yes + tags: precheck + + - name: Wait for test Pod to be running (max 90 seconds) + kubernetes.core.k8s_info: + api_version: v1 + kind: Pod + name: test-pod-precheck + namespace: default + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + register: test_pod_status + until: test_pod_status.resources[0].status.phase in ["Running", "Succeeded"] + retries: 18 + delay: 5 + when: test_pod_creation is succeeded + ignore_errors: yes + tags: precheck + + - name: Display test PVC status + debug: + msg: + - "{{ '[PASS] Test PVC: Bound' if test_pvc_status is succeeded else '[WARN] Test PVC: Not bound within timeout' }}" + - "{{ 'PVC Phase: ' + test_pvc_status.resources[0].status.phase if test_pvc_status.resources is defined and test_pvc_status.resources | length > 0 else 'PVC Phase: Unknown' }}" + - "{{ 'StorageClass Used: ' + (default_storage_class if default_storage_class else storage_classes[0]) if storage_classes is defined else '' }}" + when: test_pvc_creation is defined + tags: precheck + + - name: Display test Pod status + debug: + msg: + - "{{ '[PASS] Test Pod: Running/Succeeded' if test_pod_status is succeeded else '[WARN] Test Pod: Pending/Failed within timeout' }}" + - "{{ 'Pod Phase: ' + test_pod_status.resources[0].status.phase if test_pod_status.resources is defined and test_pod_status.resources | length > 0 else 'Pod Phase: Unknown' }}" + when: test_pod_creation is defined + tags: precheck + + - name: Describe test PVC for troubleshooting + command: kubectl describe pvc test-pvc-precheck -n default + register: test_pvc_describe + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: test_pvc_creation is succeeded and test_pvc_status is failed + ignore_errors: yes + tags: precheck + + - name: Display PVC troubleshooting info + debug: + msg: "{{ test_pvc_describe.stdout_lines }}" + when: + - test_pvc_describe is defined + - test_pvc_describe is not skipped + - test_pvc_describe.rc is defined + - test_pvc_describe.rc == 0 + tags: precheck + + - name: Describe test Pod for troubleshooting + command: kubectl describe pod test-pod-precheck -n default + register: test_pod_describe + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: test_pod_creation is succeeded and test_pod_status is failed + ignore_errors: yes + tags: precheck + + - name: Display Pod troubleshooting info + debug: + msg: "{{ test_pod_describe.stdout_lines }}" + when: + - test_pod_describe is defined + - test_pod_describe is not skipped + - test_pod_describe.rc is defined + - test_pod_describe.rc == 0 + tags: precheck + + - name: Clean up test Pod + kubernetes.core.k8s: + state: absent + api_version: v1 + kind: Pod + name: test-pod-precheck + namespace: default + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: test_pod_creation is defined + ignore_errors: yes + tags: precheck + + - name: Clean up test PVC + kubernetes.core.k8s: + state: absent + api_version: v1 + kind: PersistentVolumeClaim + name: test-pvc-precheck + namespace: default + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: test_pvc_creation is defined + ignore_errors: yes + tags: precheck + + when: kubeconfig_check.stat.exists and cluster_info_check is defined and cluster_info_check.rc == 0 + + # Ingress/Route Controller Prechecks + - name: Check Ingress Controller for Vanilla Kubernetes + block: + - name: Check for IngressClass resources + command: kubectl get ingressclass -o json + register: ingress_class_check + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + tags: precheck + + - name: Parse IngressClass information + set_fact: + ingress_classes: "{{ (ingress_class_check.stdout | from_json)['items'] | map(attribute='metadata.name') | list }}" + default_ingress_class: "{{ (ingress_class_check.stdout | from_json)['items'] | selectattr('metadata.annotations.ingressclass.kubernetes.io/is-default-class', 'defined') | selectattr('metadata.annotations.ingressclass.kubernetes.io/is-default-class', 'equalto', 'true') | map(attribute='metadata.name') | first | default('') }}" + when: ingress_class_check.rc == 0 + tags: precheck + + - name: Check for common ingress controllers (NGINX) + shell: kubectl get pods -A -o json | jq -r '.items[] | select(.metadata.name | test("ingress-nginx|nginx-ingress")) | "\(.metadata.namespace)/\(.metadata.name)"' + register: nginx_ingress_pods + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + tags: precheck + + - name: Display ingress controller information + debug: + msg: + - "{{ '[PASS] IngressClass Found: ' + (ingress_classes | join(', ')) if ingress_classes is defined and ingress_classes | length > 0 else '[WARN] No IngressClass found' }}" + - "{{ 'Default IngressClass: ' + default_ingress_class if default_ingress_class else 'No default IngressClass (will need deploy_ingress_controller=on)' }}" + - "{{ '[PASS] NGINX Ingress Controller pods: ' + nginx_ingress_pods.stdout if nginx_ingress_pods.stdout else 'No NGINX ingress pods detected' }}" + when: ingress_class_check.rc == 0 + tags: precheck + + # Test NGINX Ingress Controller connectivity + - name: Get NGINX Ingress Controller service endpoint + shell: | + kubectl get svc -A -l app.kubernetes.io/name=ingress-nginx -o json 2>/dev/null | \ + jq -r '.items[0] | "\(.metadata.namespace)/\(.metadata.name):\(.spec.ports[] | select(.name=="http") | .port)"' 2>/dev/null || \ + kubectl get svc -n ingress-nginx ingress-nginx-controller -o json 2>/dev/null | \ + jq -r '"\(.metadata.namespace)/\(.metadata.name):\(.spec.ports[] | select(.name=="http") | .port)"' 2>/dev/null || \ + echo "not-found" + register: nginx_ingress_endpoint + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: nginx_ingress_pods.stdout + tags: precheck + + - name: Parse NGINX Ingress endpoint + set_fact: + nginx_namespace: "{{ nginx_ingress_endpoint.stdout.split('/')[0] }}" + nginx_service: "{{ nginx_ingress_endpoint.stdout.split('/')[1].split(':')[0] }}" + nginx_port: "{{ nginx_ingress_endpoint.stdout.split(':')[1] }}" + when: nginx_ingress_endpoint is defined and nginx_ingress_endpoint.stdout != "not-found" and nginx_ingress_pods.stdout + tags: precheck + + - name: Test NGINX Ingress Controller connectivity + shell: | + kubectl run test-curl-nginx --image=curlimages/curl:latest --rm -i --restart=Never --timeout=30s -- \ + curl -s -o /dev/null -w "%{http_code}" http://{{ nginx_service }}.{{ nginx_namespace }}.svc.cluster.local:{{ nginx_port }} --max-time 10 || echo "000" + register: nginx_connectivity_test + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: nginx_service is defined and nginx_namespace is defined and nginx_port is defined + ignore_errors: yes + tags: precheck + + - name: Display NGINX Ingress connectivity test result + debug: + msg: + - "{{ '[PASS] NGINX Ingress Connectivity: Reachable (HTTP ' + nginx_connectivity_test.stdout.strip() + ')' if nginx_connectivity_test is defined and nginx_connectivity_test.stdout.strip() in ['200', '404', '503'] else '[WARN] NGINX Ingress Connectivity: Not reachable or timeout' }}" + - "Endpoint tested: http://{{ nginx_service }}.{{ nginx_namespace }}.svc.cluster.local:{{ nginx_port }}" + when: nginx_connectivity_test is defined + tags: precheck + + # Functional Test: Create test Ingress to verify external accessibility + - name: Create test web server pod for ingress validation + kubernetes.core.k8s: + state: present + definition: + apiVersion: v1 + kind: Pod + metadata: + name: test-ingress-web + namespace: default + labels: + app: test-ingress-web + spec: + restartPolicy: Never + containers: + - name: nginx + image: nginx:alpine + ports: + - containerPort: 80 + command: ["/bin/sh", "-c"] + args: + - | + echo "Ingress test successful - $(date)" > /usr/share/nginx/html/index.html + nginx -g 'daemon off;' + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: ingress_classes is defined and ingress_classes | length > 0 + register: test_ingress_pod_creation + ignore_errors: yes + tags: precheck + + - name: Create test service for ingress validation + kubernetes.core.k8s: + state: present + definition: + apiVersion: v1 + kind: Service + metadata: + name: test-ingress-web + namespace: default + spec: + selector: + app: test-ingress-web + ports: + - protocol: TCP + port: 80 + targetPort: 80 + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: test_ingress_pod_creation is succeeded + register: test_ingress_service_creation + ignore_errors: yes + tags: precheck + + - name: Wait for test ingress pod to be running (max 60 seconds) + kubernetes.core.k8s_info: + api_version: v1 + kind: Pod + name: test-ingress-web + namespace: default + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + register: test_ingress_pod_status + until: test_ingress_pod_status.resources[0].status.phase == "Running" + retries: 12 + delay: 5 + when: test_ingress_pod_creation is succeeded + ignore_errors: yes + tags: precheck + + - name: Create test Ingress resource + kubernetes.core.k8s: + state: present + definition: + apiVersion: networking.k8s.io/v1 + kind: Ingress + metadata: + name: test-ingress-precheck + namespace: default + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / + spec: + ingressClassName: "{{ default_ingress_class if default_ingress_class else ingress_classes[0] }}" + rules: + - http: + paths: + - path: /test-ingress + pathType: Prefix + backend: + service: + name: test-ingress-web + port: + number: 80 + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: test_ingress_service_creation is succeeded and test_ingress_pod_status is succeeded + register: test_ingress_creation + ignore_errors: yes + tags: precheck + + - name: Wait for Ingress to get an address/hostname (max 60 seconds) + kubernetes.core.k8s_info: + api_version: networking.k8s.io/v1 + kind: Ingress + name: test-ingress-precheck + namespace: default + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + register: test_ingress_status + until: > + test_ingress_status.resources[0].status.loadBalancer.ingress is defined and + test_ingress_status.resources[0].status.loadBalancer.ingress | length > 0 + retries: 12 + delay: 5 + when: test_ingress_creation is succeeded + ignore_errors: yes + tags: precheck + + - name: Extract Ingress address + set_fact: + ingress_address: >- + {{ test_ingress_status.resources[0].status.loadBalancer.ingress[0].ip + if test_ingress_status.resources[0].status.loadBalancer.ingress[0].ip is defined + else test_ingress_status.resources[0].status.loadBalancer.ingress[0].hostname }} + when: test_ingress_status is succeeded and test_ingress_status.resources[0].status.loadBalancer.ingress is defined + tags: precheck + + - name: Test external Ingress accessibility + uri: + url: "http://{{ ingress_address }}/test-ingress" + method: GET + return_content: yes + status_code: [200, 404] + timeout: 30 + register: ingress_external_test + when: ingress_address is defined + ignore_errors: yes + tags: precheck + + - name: Display Ingress functional test results + debug: + msg: + - "{{ '[PASS] Ingress Resource: Created and assigned address' if test_ingress_status is succeeded and ingress_address is defined else '[WARN] Ingress Resource: No address assigned' }}" + - "{{ 'Ingress Address: ' + ingress_address if ingress_address is defined else 'Ingress Address: Not available' }}" + - "{{ '[PASS] External Ingress Accessibility: Reachable (HTTP ' + (ingress_external_test.status | string) + ')' if ingress_external_test is succeeded else '[WARN] External Ingress Accessibility: Not reachable or timeout' }}" + - "{{ 'IngressClass Used: ' + (default_ingress_class if default_ingress_class else ingress_classes[0]) if ingress_classes is defined else '' }}" + when: test_ingress_creation is defined + tags: precheck + + - name: Describe test Ingress for troubleshooting + command: kubectl describe ingress test-ingress-precheck -n default + register: test_ingress_describe + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: test_ingress_creation is succeeded and (test_ingress_status is failed or ingress_external_test is failed) + ignore_errors: yes + tags: precheck + + - name: Display Ingress troubleshooting info + debug: + msg: "{{ test_ingress_describe.stdout_lines }}" + when: + - test_ingress_describe is defined + - test_ingress_describe is not skipped + - test_ingress_describe.rc is defined + - test_ingress_describe.rc == 0 + tags: precheck + + - name: Clean up test Ingress + kubernetes.core.k8s: + state: absent + api_version: networking.k8s.io/v1 + kind: Ingress + name: test-ingress-precheck + namespace: default + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: test_ingress_creation is defined + ignore_errors: yes + tags: precheck + + - name: Clean up test Service + kubernetes.core.k8s: + state: absent + api_version: v1 + kind: Service + name: test-ingress-web + namespace: default + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: test_ingress_service_creation is defined + ignore_errors: yes + tags: precheck + + - name: Clean up test Pod + kubernetes.core.k8s: + state: absent + api_version: v1 + kind: Pod + name: test-ingress-web + namespace: default + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: test_ingress_pod_creation is defined + ignore_errors: yes + tags: precheck + + when: + - kubeconfig_check.stat.exists + - cluster_info_check is defined and cluster_info_check.rc == 0 + - is_vanilla_k8s is defined and is_vanilla_k8s + + # OpenShift Route Controller Prechecks + - name: Check OpenShift Router + block: + - name: Check for OpenShift router pods + command: kubectl get pods -n openshift-ingress -l ingresscontroller.operator.openshift.io/deployment-ingresscontroller=default -o json + register: openshift_router_check + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + tags: precheck + + - name: Parse OpenShift router status + set_fact: + router_pods: "{{ (openshift_router_check.stdout | from_json)['items'] | map(attribute='metadata.name') | list }}" + router_ready: "{{ (openshift_router_check.stdout | from_json)['items'] | selectattr('status.phase', 'defined') | selectattr('status.phase', 'equalto', 'Running') | list | length }}" + when: openshift_router_check.rc == 0 + tags: precheck + + - name: Display OpenShift router status + debug: + msg: + - "{{ '[PASS] OpenShift Router: ' + (router_ready | string) + ' pod(s) running' if router_ready is defined and router_ready | int > 0 else '[FAIL] OpenShift Router: Not running or not found' }}" + - "Router pods: {{ router_pods | join(', ') if router_pods is defined else 'None' }}" + when: openshift_router_check.rc == 0 + tags: precheck + + - name: Check route.openshift.io API availability + command: kubectl api-resources --api-group=route.openshift.io + register: route_api_check + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + tags: precheck + + - name: Display route API status + debug: + msg: "{{ '[PASS] OpenShift Route API: Available' if route_api_check.rc == 0 else '[FAIL] OpenShift Route API: Not available' }}" + tags: precheck + + # Test OpenShift Router connectivity + - name: Get OpenShift Router service endpoint + shell: | + kubectl get svc -n openshift-ingress router-default -o json 2>/dev/null | \ + jq -r '"\(.metadata.namespace)/\(.metadata.name):\(.spec.ports[] | select(.name=="http") | .port)"' 2>/dev/null || \ + kubectl get svc -n openshift-ingress -l ingresscontroller.operator.openshift.io/deployment-ingresscontroller=default -o json 2>/dev/null | \ + jq -r '.items[0] | "\(.metadata.namespace)/\(.metadata.name):\(.spec.ports[] | select(.name=="http") | .port)"' 2>/dev/null || \ + echo "not-found" + register: router_endpoint + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: openshift_router_check.rc == 0 and router_ready is defined and router_ready | int > 0 + tags: precheck + + - name: Parse OpenShift Router endpoint + set_fact: + router_namespace: "{{ router_endpoint.stdout.split('/')[0] }}" + router_service: "{{ router_endpoint.stdout.split('/')[1].split(':')[0] }}" + router_port: "{{ router_endpoint.stdout.split(':')[1] }}" + when: router_endpoint is defined and router_endpoint.stdout != "not-found" + tags: precheck + + - name: Test OpenShift Router connectivity + shell: | + kubectl run test-curl-router --image=curlimages/curl:latest --rm -i --restart=Never --timeout=30s -- \ + curl -s -o /dev/null -w "%{http_code}" http://{{ router_service }}.{{ router_namespace }}.svc.cluster.local:{{ router_port }} --max-time 10 || echo "000" + register: router_connectivity_test + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: router_service is defined and router_namespace is defined and router_port is defined + ignore_errors: yes + tags: precheck + + - name: Display OpenShift Router connectivity test result + debug: + msg: + - "{{ '[PASS] OpenShift Router Connectivity: Reachable (HTTP ' + router_connectivity_test.stdout.strip() + ')' if router_connectivity_test is defined and router_connectivity_test.stdout.strip() in ['200', '404', '503'] else '[WARN] OpenShift Router Connectivity: Not reachable or timeout' }}" + - "Endpoint tested: http://{{ router_service }}.{{ router_namespace }}.svc.cluster.local:{{ router_port }}" + when: router_connectivity_test is defined + tags: precheck + + when: + - kubeconfig_check.stat.exists + - cluster_info_check is defined and cluster_info_check.rc == 0 + - is_openshift is defined and is_openshift + + # EKS-Specific Prechecks + - name: Check EKS-specific requirements + block: + - name: Check for AWS Load Balancer Controller + shell: kubectl get pods -A -o json | jq -r '.items[] | select(.metadata.name | test("aws-load-balancer-controller")) | "\(.metadata.namespace)/\(.metadata.name)"' + register: alb_controller_check + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + tags: precheck + + - name: Check for EBS CSI Driver + command: kubectl get csidriver ebs.csi.aws.com + register: ebs_csi_check + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/ubuntu/.kube/config" + tags: precheck + + - name: Check AWS credentials in environment + shell: | + if [ -n "$AWS_ACCESS_KEY_ID" ] && [ -n "$AWS_SECRET_ACCESS_KEY" ]; then + echo "configured" + else + echo "missing" + fi + register: aws_creds_check + changed_when: false + tags: precheck + + - name: Check if inference-config.cfg exists for EKS certificate validation + stat: + path: "{{ playbook_dir }}/../inventory/inference-config.cfg" + register: eks_config_file_check + tags: precheck + + - name: Read AWS certificate ARN from inference-config.cfg + shell: grep '^aws_certificate_arn=' {{ playbook_dir }}/../inventory/inference-config.cfg | cut -d'=' -f2- | tr -d ' \t\r\n' + register: aws_cert_arn_output + when: eks_config_file_check.stat.exists + changed_when: false + failed_when: false + tags: precheck + + - name: Set AWS certificate ARN fact + set_fact: + aws_certificate_arn_value: "{{ aws_cert_arn_output.stdout | default('') | trim }}" + when: aws_cert_arn_output is defined and aws_cert_arn_output.stdout is defined + tags: precheck + + - name: Set empty AWS certificate ARN fact if not found + set_fact: + aws_certificate_arn_value: "" + when: aws_cert_arn_output is not defined or aws_cert_arn_output.stdout is not defined + tags: precheck + + - name: Validate AWS certificate ARN for ALB (EKS) + assert: + that: + - aws_certificate_arn_value is defined + - aws_certificate_arn_value | length > 0 + - aws_certificate_arn_value is match('^arn:aws:acm:[a-z0-9-]+:\\d{12}:certificate/.+') + fail_msg: | + [FAIL] ALB certificate ARN missing or invalid for EKS. + Set aws_certificate_arn in inference-config.cfg (DEFAULT section) to a valid ACM certificate ARN. + success_msg: "[PASS] ALB certificate ARN is configured" + register: aws_certificate_arn_validation + when: eks_config_file_check.stat.exists + ignore_errors: yes + tags: precheck + + - name: Determine AWS region for ACM validation + set_fact: + aws_region_value: "{{ (lookup('env', 'AWS_REGION') | default('', true)) or (lookup('env', 'AWS_DEFAULT_REGION') | default('', true)) }}" + tags: precheck + + - name: Display EKS-specific checks + debug: + msg: + - "{{ '[PASS] AWS Load Balancer Controller: Found - ' + alb_controller_check.stdout if alb_controller_check.stdout else '\033[1;31m[FAIL]\033[0m AWS Load Balancer Controller: Not found' }}" + - "{{ '[PASS] EBS CSI Driver: Installed' if ebs_csi_check.rc == 0 else '\033[1;31m[FAIL]\033[0m EBS CSI Driver: Not found' }}" + - "{{ '[PASS] AWS Credentials: Configured in environment' if aws_creds_check.stdout == 'configured' else '\033[1;31m[FAIL]\033[0m AWS Credentials: Missing (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)' }}" + - "{{ '[PASS] ALB TLS Certificate ARN: ' + aws_certificate_arn_value if (aws_certificate_arn_validation is defined and aws_certificate_arn_validation is succeeded and aws_certificate_arn_value is defined and aws_certificate_arn_value | length > 0) else '\033[1;31m[FAIL]\033[0m ALB TLS Certificate ARN: Missing/invalid in inference-config.cfg' }}" + - >- + {% if aws_certificate_arn_value is not defined or aws_certificate_arn_value | length == 0 %} + \033[1;31m[FAIL]\033[0m ACM Certificate Validation: ARN missing in inference-config.cfg + {% elif aws_region_value is not defined or aws_region_value | length == 0 %} + \033[1;31m[FAIL]\033[0m ACM Certificate Validation: AWS_REGION/AWS_DEFAULT_REGION not set for validation + {% elif acm_certificate_status is defined and acm_certificate_status is succeeded and (acm_certificate_status.stdout | trim) == 'ISSUED' %} + [PASS] ACM Certificate Validation: Found and ISSUED in {{ aws_region_value }} + {% else %} + \033[1;31m[FAIL]\033[0m ACM Certificate Validation: Not found or not ISSUED in {{ aws_region_value | default('unknown region') }} + {% endif %} + tags: precheck + + # Test AWS Load Balancer Controller connectivity + - name: Get AWS Load Balancer Controller webhook service + shell: | + kubectl get svc -A -l app.kubernetes.io/name=aws-load-balancer-controller -o json 2>/dev/null | \ + jq -r '.items[0] | "\(.metadata.namespace)/\(.metadata.name):\(.spec.ports[0].port)"' 2>/dev/null || \ + echo "not-found" + register: alb_controller_endpoint + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: alb_controller_check.stdout + tags: precheck + + - name: Parse AWS Load Balancer Controller endpoint + set_fact: + alb_namespace: "{{ alb_controller_endpoint.stdout.split('/')[0] }}" + alb_service: "{{ alb_controller_endpoint.stdout.split('/')[1].split(':')[0] }}" + alb_port: "{{ alb_controller_endpoint.stdout.split(':')[1] }}" + when: alb_controller_endpoint is defined and alb_controller_endpoint.stdout != "not-found" + tags: precheck + + - name: Test AWS Load Balancer Controller webhook connectivity + shell: | + kubectl run test-curl-alb --image=curlimages/curl:latest --rm -i --restart=Never --timeout=30s -- \ + curl -s -o /dev/null -w "%{http_code}" -k https://{{ alb_service }}.{{ alb_namespace }}.svc.cluster.local:{{ alb_port }}/healthz --max-time 10 || echo "000" + register: alb_connectivity_test + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: alb_service is defined and alb_namespace is defined and alb_port is defined + ignore_errors: yes + tags: precheck + + - name: Display AWS Load Balancer Controller connectivity test result + debug: + msg: + - "{{ '[PASS] AWS LB Controller Connectivity: Reachable (HTTP ' + alb_connectivity_test.stdout.strip() + ')' if alb_connectivity_test is defined and alb_connectivity_test.stdout.strip() in ['200', '404'] else '[WARN] AWS LB Controller Connectivity: Not reachable or timeout' }}" + - "Endpoint tested: https://{{ alb_service }}.{{ alb_namespace }}.svc.cluster.local:{{ alb_port }}/healthz" + when: alb_connectivity_test is defined + tags: precheck + + when: + - kubeconfig_check.stat.exists + - cluster_info_check is defined and cluster_info_check.rc == 0 + - detected_platform is defined and 'EKS' in detected_platform + + # Istio Service Mesh Prechecks + - name: Check for existing Istio installation + block: + - name: Check if istio-system namespace exists + command: kubectl get namespace istio-system -o json + register: istio_namespace_check + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + tags: precheck + + - name: Check for istiod deployment + command: kubectl get deployment -n istio-system istiod -o json + register: istiod_check + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: istio_namespace_check.rc == 0 + tags: precheck + + - name: Parse Istio deployment status + set_fact: + istio_installed: "{{ istio_namespace_check.rc == 0 and istiod_check is defined and istiod_check is not skipped and istiod_check.rc is defined and istiod_check.rc == 0 }}" + istiod_replicas: "{{ (istiod_check.stdout | from_json).status.replicas | default(0) if istiod_check is defined and istiod_check is not skipped and istiod_check.rc is defined and istiod_check.rc == 0 else 0 }}" + istiod_ready_replicas: "{{ (istiod_check.stdout | from_json).status.readyReplicas | default(0) if istiod_check is defined and istiod_check is not skipped and istiod_check.rc is defined and istiod_check.rc == 0 else 0 }}" + when: istio_namespace_check is defined + tags: precheck + + - name: Check for Istio version (if installed) + shell: | + kubectl get deployment istiod -n istio-system -o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null | grep -oP '(?<=:)[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown" + register: istio_version_check + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + when: istio_installed is defined and istio_installed + tags: precheck + + - name: Get deploy_istio config value + shell: grep '^deploy_istio=' {{ playbook_dir }}/../inventory/inference-config.cfg | cut -d'=' -f2 + register: deploy_istio_config + failed_when: false + changed_when: false + tags: precheck + + - name: Display Istio detection results + debug: + msg: "{{ istio_messages | reject('equalto', '') | list }}" + vars: + istio_messages: + - "=========================================" + - "ISTIO SERVICE MESH DETECTION" + - "=========================================" + - "{{ 'Status: Istio is already installed in the cluster' if istio_installed else 'Status: No Istio installation detected' }}" + - "{{ 'Namespace: istio-system (exists)' if istio_namespace_check.rc == 0 else 'Namespace: istio-system (not found)' }}" + - "{{ 'Istiod Deployment: ' + (istiod_ready_replicas | string) + '/' + (istiod_replicas | string) + ' replicas ready' if istio_installed else 'Istiod Deployment: Not found' }}" + - "{{ 'Istio Version: ' + istio_version_check.stdout if istio_installed and istio_version_check is defined and istio_version_check.stdout != 'unknown' else 'Istio Version: Not applicable' }}" + - "" + - "Current Configuration: deploy_istio={{ deploy_istio_config.stdout if deploy_istio_config.stdout else 'not set' }}" + - "" + - "{{ 'IMPORTANT - When Istio is already installed:' if istio_installed else ('NOTE - Istio is not supported on OpenShift through this automation' if (is_openshift is defined and is_openshift) else 'NOTE - If you want to use Istio (EKS/Vanilla K8s only):') }}" + - "{{ ' • Configuration will be auto-updated to deploy_istio=off' if istio_installed else ('' if (is_openshift is defined and is_openshift) else ' • Set deploy_istio=on in inference-config.cfg') }}" + - "{{ ' • Deployment will use the existing Istio mesh' if istio_installed else ('' if (is_openshift is defined and is_openshift) else ' • Automation will install Istio during deployment') }}" + - "{{ ' • Existing Istio configuration will be preserved' if istio_installed else '' }}" + - "{{ ' • Ensure compatibility with existing version (recommended: 1.20+)' if istio_installed else '' }}" + - "{{ ' • Verify ingress gateway and network policies are configured' if istio_installed else '' }}" + - "=========================================" + when: istio_namespace_check is defined + tags: precheck + + - name: Auto-update inference-config.cfg when Istio is already installed + lineinfile: + path: "{{ playbook_dir }}/../inventory/inference-config.cfg" + regexp: '^deploy_istio=' + line: 'deploy_istio=off' + create: no + when: + - istio_installed is defined + - istio_installed + tags: precheck + + - name: Display Istio conflict auto-resolution + debug: + msg: + - "=========================================" + - "ISTIO CONFLICT AUTO-RESOLVED" + - "=========================================" + - "Detected: Istio is already installed in the cluster (version {{ istio_version_check.stdout if istio_version_check is defined and istio_version_check.stdout != 'unknown' else 'detected' }})" + - "" + - "ACTION TAKEN:" + - " Automatically set deploy_istio=off in inference-config.cfg" + - " to use the existing Istio service mesh." + - "" + - "IMPORTANT - SIDE EFFECTS TO BE AWARE OF:" + - " 1. Deployment will NOT install or modify Istio" + - " 2. Existing Istio configuration will be preserved" + - " 3. Application will use the existing Istio mesh" + - " 4. Ensure existing Istio version is compatible (recommended: 1.20+)" + - " 5. Verify Istio ingress gateway is properly configured" + - " 6. Network policies may need manual adjustment for Istio ambient mode" + - "" + - "STATUS:" + - " Previous config: deploy_istio={{ deploy_istio_config.stdout if deploy_istio_config.stdout else 'not set' }}" + - " Updated config: deploy_istio=off" + - "" + - "Proceeding with deployment using existing Istio installation..." + - "=========================================" + when: + - istio_installed is defined + - istio_installed + tags: precheck + + when: + - kubeconfig_check.stat.exists + - cluster_info_check is defined and cluster_info_check.rc == 0 + + # Node Label Prechecks + - name: Check required node labels + block: + - name: Check for nodes with ei-infra-eligible label + shell: kubectl get nodes -l ei-infra-eligible=true -o json | jq -r '.items | length' + register: infra_nodes_check + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + tags: precheck + + - name: Check for nodes with ei-inference-eligible label + shell: kubectl get nodes -l ei-inference-eligible=true -o json | jq -r '.items | length' + register: inference_nodes_check + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + tags: precheck + + - name: Get node names with labels + shell: | + echo "=== Infra Nodes (ei-infra-eligible=true) ===" + kubectl get nodes -l ei-infra-eligible=true -o custom-columns=NAME:.metadata.name,STATUS:.status.conditions[-1].type --no-headers 2>/dev/null || echo "None" + echo "" + echo "=== Inference Nodes (ei-inference-eligible=true) ===" + kubectl get nodes -l ei-inference-eligible=true -o custom-columns=NAME:.metadata.name,STATUS:.status.conditions[-1].type --no-headers 2>/dev/null || echo "None" + register: labeled_nodes_list + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + tags: precheck + + - name: Validate node labels + debug: + msg: + - "{{ '[PASS] Infrastructure Nodes (ei-infra-eligible=true): ' + infra_nodes_check.stdout + ' node(s) found' if infra_nodes_check.stdout | int > 0 else '[FAIL] No nodes with ei-infra-eligible=true label found' }}" + - "{{ '[PASS] Inference Nodes (ei-inference-eligible=true): ' + inference_nodes_check.stdout + ' node(s) found' if inference_nodes_check.stdout | int > 0 else '[FAIL] No nodes with ei-inference-eligible=true label found' }}" + - "{{ labeled_nodes_list.stdout_lines | join('\n') }}" + tags: precheck + + - name: Assert required node labels exist + assert: + that: + - infra_nodes_check.stdout | int > 0 + - inference_nodes_check.stdout | int > 0 + fail_msg: | + ======================================== + CRITICAL: Missing Required Node Labels + ======================================== + Required node labels for workload scheduling: + - ei-infra-eligible=true (for Keycloak, APISIX, GenAI Gateway, Observability) + - ei-inference-eligible=true (for LLM model workloads) + + Current state: + - ei-infra-eligible=true nodes: {{ infra_nodes_check.stdout }} + - ei-inference-eligible=true nodes: {{ inference_nodes_check.stdout }} + + Without proper labels, pods will remain in Pending state! + + To add labels, run: + kubectl label nodes ei-infra-eligible=true + kubectl label nodes ei-inference-eligible=true + + Or use the automated labeling playbook: + ansible-playbook playbooks/label-nodes.yml + ======================================== + success_msg: "[PASS] Required node labels are properly configured" + register: node_labels_validation + ignore_errors: yes + tags: precheck + + when: kubeconfig_check.stat.exists and cluster_info_check is defined and cluster_info_check.rc == 0 + + # Resource Availability Prechecks + - name: Check cluster resource availability + block: + - name: Get total cluster resources + shell: | + kubectl top nodes 2>/dev/null || kubectl get nodes -o json | jq -r '.items[] | "\(.metadata.name): CPU=\(.status.capacity.cpu) Memory=\(.status.capacity.memory)"' + register: cluster_resources + failed_when: false + changed_when: false + become_user: "{{ setup_user }}" + environment: + KUBECONFIG: "/home/{{ setup_user }}/.kube/config" + tags: precheck + + - name: Display cluster resources + debug: + msg: + - "Cluster Resource Summary:" + - "{{ cluster_resources.stdout_lines | join('\n') }}" + when: cluster_resources.rc == 0 + tags: precheck + + when: kubeconfig_check.stat.exists and cluster_info_check is defined and cluster_info_check.rc == 0 + + # Configuration File Validation + - name: Validate inference configuration file + block: + - name: Check if inference-config.cfg exists + stat: + path: "{{ playbook_dir }}/../inventory/inference-config.cfg" + register: config_file_check + tags: precheck + + - name: Read and validate configuration parameters + shell: | + CONFIG_FILE="{{ playbook_dir }}/../inventory/inference-config.cfg" + if [ -f "$CONFIG_FILE" ]; then + echo "=== Required Configuration Parameters ===" + echo "cluster_url: $(grep '^cluster_url=' $CONFIG_FILE | cut -d'=' -f2)" + echo "cert_file: $(grep '^cert_file=' $CONFIG_FILE | cut -d'=' -f2)" + echo "key_file: $(grep '^key_file=' $CONFIG_FILE | cut -d'=' -f2)" + echo "" + echo "=== Deployment Flags ===" + echo "deploy_ingress_controller: $(grep '^deploy_ingress_controller=' $CONFIG_FILE | cut -d'=' -f2)" + echo "deploy_llm_models: $(grep '^deploy_llm_models=' $CONFIG_FILE | cut -d'=' -f2)" + echo "models: $(grep '^models=' $CONFIG_FILE | cut -d'=' -f2)" + else + echo "Configuration file not found" + fi + register: config_validation + failed_when: false + changed_when: false + tags: precheck + + - name: Display configuration validation + debug: + msg: "{{ config_validation.stdout_lines }}" + when: config_file_check.stat.exists + tags: precheck + + when: kubeconfig_check.stat.exists + + # DNS Resolution Precheck (if cluster_url is configured) + - name: Check DNS resolution for cluster URL + block: + - name: Get cluster_url from config + shell: grep '^cluster_url=' {{ playbook_dir }}/../inventory/inference-config.cfg | cut -d'=' -f2 + register: cluster_url_value + failed_when: false + changed_when: false + tags: precheck + + - name: Test DNS resolution + shell: | + if [ -n "{{ cluster_url_value.stdout }}" ]; then + nslookup {{ cluster_url_value.stdout }} 2>/dev/null || host {{ cluster_url_value.stdout }} 2>/dev/null || echo "DNS resolution failed" + fi + register: dns_check + failed_when: false + changed_when: false + when: cluster_url_value.stdout is defined and cluster_url_value.stdout != "" + tags: precheck + + - name: Display DNS check result + debug: + msg: + - "DNS Check for {{ cluster_url_value.stdout }}:" + - "{{ dns_check.stdout if dns_check is defined else 'Skipped - no cluster_url configured' }}" + when: cluster_url_value.stdout is defined and cluster_url_value.stdout != "" + tags: precheck + + when: kubeconfig_check.stat.exists + + # Precheck Summary + - name: Display comprehensive precheck summary + debug: + msg: "{{ messages | reject('equalto', '') | list }}" + vars: + messages: + - "========================================" + - "BROWNFIELD DEPLOYMENT PRECHECK SUMMARY" + - "========================================" + - "{{ '[PASS]' if os_check is succeeded else '[FAIL]' }} Deployment Machine OS: {{ ansible_distribution }} {{ ansible_distribution_version }}" + - "{{ '[PASS]' if cluster_info_check is defined and cluster_info_check.rc == 0 else '[FAIL]' }} Cluster API Connection: {{ 'Connected' if cluster_info_check is defined and cluster_info_check.rc == 0 else 'Failed' }}" + - "{{ '[PASS]' if detected_platform is defined else '[INFO]' }} Detected Platform: {{ detected_platform if detected_platform is defined else 'Unknown' }}" + - "{{ '[PASS]' if storage_classes is defined and storage_classes | length > 0 else '[FAIL]' }} StorageClass: {{ storage_classes | length if storage_classes is defined else 0 }} available" + - "{{ '[PASS]' if (is_openshift is defined and is_openshift and route_api_check is defined and route_api_check.rc == 0) or (is_vanilla_k8s is defined and is_vanilla_k8s and ingress_classes is defined and ingress_classes | length > 0) or (detected_platform is defined and 'EKS' in detected_platform and alb_controller_check is defined and alb_controller_check.stdout) else '[INFO]' }} Ingress/Router: {{ 'OpenShift Router OK' if is_openshift is defined and is_openshift else ('IngressClass available' if ingress_classes is defined and ingress_classes | length > 0 else ('ALB Controller OK' if detected_platform is defined and 'EKS' in detected_platform and alb_controller_check is defined and alb_controller_check.stdout else 'Check required')) }}" + - "{{ '[INFO]' if istio_installed is defined and istio_installed else '[INFO]' }} Istio Service Mesh: {{ 'Installed (v' + istio_version_check.stdout + ') - recommend deploy_istio=off' if istio_installed is defined and istio_installed and istio_version_check is defined and istio_version_check.stdout != 'unknown' else ('Installed - recommended to set deploy_istio=off to avoid conflicts' if istio_installed is defined and istio_installed else 'Not installed') }}" + - "{{ (('[PASS]' if aws_certificate_arn_validation is defined and aws_certificate_arn_validation is succeeded else '[FAIL]') + ' EKS Certificate ARN: ' + ('Configured' if aws_certificate_arn_value is defined and aws_certificate_arn_value else 'Missing in inference-config.cfg')) if (detected_platform is defined and 'EKS' in detected_platform) else '' }}" + - "{{ '[PASS]' if infra_nodes_check is defined and infra_nodes_check.stdout | int > 0 else '[FAIL]' }} Node Labels (ei-infra-eligible): {{ infra_nodes_check.stdout if infra_nodes_check is defined else 'N/A' }} nodes" + - "{{ '[PASS]' if inference_nodes_check is defined and inference_nodes_check.stdout | int > 0 else '[FAIL]' }} Node Labels (ei-inference-eligible): {{ inference_nodes_check.stdout if inference_nodes_check is defined else 'N/A' }} nodes" + - "{{ 'All critical prechecks passed!' if (cluster_info_check is defined and cluster_info_check.rc == 0 and storage_classes is defined and storage_classes | length > 0 and infra_nodes_check is defined and infra_nodes_check.stdout | int > 0 and inference_nodes_check is defined and inference_nodes_check.stdout | int > 0 and (detected_platform is not defined or 'EKS' not in detected_platform or (aws_certificate_arn_validation is defined and aws_certificate_arn_validation is succeeded))) else 'CRITICAL: Some prechecks failed - review above for details' }}" + - "========================================" + tags: precheck + + - name: Fail if critical prechecks failed + fail: + msg: | + ======================================== + DEPLOYMENT BLOCKED: Critical Prechecks Failed + ======================================== + One or more critical prechecks have failed. Please review the summary above and resolve the issues before proceeding with deployment. + + Common issues to check: + 1. Cluster API connection - verify KUBECONFIG is correct + 2. Storage classes - ensure at least one StorageClass exists + 3. Node labels - verify nodes are labeled with ei-infra-eligible=true and ei-inference-eligible=true + 4. EKS certificate ARN - verify aws_certificate_arn in inference-config.cfg (for EKS clusters) + 5. Ingress controller - verify ingress controller or ALB controller is deployed + + To apply node labels automatically, run: + ansible-playbook playbooks/label-nodes.yml + + Cannot proceed with deployment until all critical checks pass. + ======================================== + when: > + kubeconfig_check.stat.exists and ( + (cluster_info_check is not defined or cluster_info_check.rc != 0) or + (storage_classes is not defined or storage_classes | length == 0) or + (infra_nodes_check is not defined or infra_nodes_check.stdout | int == 0) or + (inference_nodes_check is not defined or inference_nodes_check.stdout | int == 0) or + (detected_platform is defined and 'EKS' in detected_platform and (aws_certificate_arn_validation is not defined or aws_certificate_arn_validation is failed)) + ) + tags: precheck + + when: kubeconfig_check.stat.exists + tags: precheck + - name: Add Kubernetes tools to PATH in bashrc lineinfile: path: /home/{{ setup_user }}/.bashrc diff --git a/core/playbooks/setup-user-kubeconfig.yml b/core/playbooks/setup-user-kubeconfig.yml index 3b216d92..33c33a6d 100644 --- a/core/playbooks/setup-user-kubeconfig.yml +++ b/core/playbooks/setup-user-kubeconfig.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 - hosts: kube_control_plane gather_facts: no diff --git a/core/roles/inference-precheck/tasks/main.yml b/core/roles/inference-precheck/tasks/main.yml index b8d66e42..fffb78bb 100644 --- a/core/roles/inference-precheck/tasks/main.yml +++ b/core/roles/inference-precheck/tasks/main.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Gather OS facts @@ -38,7 +38,7 @@ set_fact: os_supported: >- {{ - (ansible_distribution == "Ubuntu" and ansible_distribution_version == "22.04") or + (ansible_distribution == "Ubuntu" and (ansible_distribution_version == "22.04" or ansible_distribution_version == "24.04")) or (ansible_distribution == "RedHat" and ansible_distribution_major_version == "9") }} @@ -52,6 +52,7 @@ Officially supported operating systems: - Ubuntu 22.04 + - Ubuntu 24.04 - RHEL 9 Current system: {{ ansible_distribution }} {{ ansible_distribution_version }} @@ -73,6 +74,7 @@ Officially supported platforms: - Ubuntu 22.04 + - Ubuntu 24.04 - RHEL 9 Do you want to proceed anyway at your own risk? (yes/no) diff --git a/core/roles/inference-tools/meta/main.yml b/core/roles/inference-tools/meta/main.yml index 2957e105..956c73cb 100644 --- a/core/roles/inference-tools/meta/main.yml +++ b/core/roles/inference-tools/meta/main.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- dependencies: diff --git a/core/roles/inference-tools/tasks/main.yml b/core/roles/inference-tools/tasks/main.yml index 241c234e..3a63e774 100644 --- a/core/roles/inference-tools/tasks/main.yml +++ b/core/roles/inference-tools/tasks/main.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Ensure Python pip module is installed diff --git a/core/roles/kubernetes-precheck/tasks/main.yml b/core/roles/kubernetes-precheck/tasks/main.yml index b71032e7..c3f78bca 100644 --- a/core/roles/kubernetes-precheck/tasks/main.yml +++ b/core/roles/kubernetes-precheck/tasks/main.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- - name: Check if kubectl is Available diff --git a/core/roles/nri_cpu_balloons/defaults/main.yaml b/core/roles/nri_cpu_balloons/defaults/main.yaml index 6ea18cb7..10f38a69 100644 --- a/core/roles/nri_cpu_balloons/defaults/main.yaml +++ b/core/roles/nri_cpu_balloons/defaults/main.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- diff --git a/core/roles/nri_cpu_balloons/tasks/install_nri.yaml b/core/roles/nri_cpu_balloons/tasks/install_nri.yaml index 15067b5e..b83248c8 100644 --- a/core/roles/nri_cpu_balloons/tasks/install_nri.yaml +++ b/core/roles/nri_cpu_balloons/tasks/install_nri.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- diff --git a/core/roles/nri_cpu_balloons/tasks/install_nri_openshift.yaml b/core/roles/nri_cpu_balloons/tasks/install_nri_openshift.yaml index 3c1876a0..5a12129d 100644 --- a/core/roles/nri_cpu_balloons/tasks/install_nri_openshift.yaml +++ b/core/roles/nri_cpu_balloons/tasks/install_nri_openshift.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- diff --git a/core/roles/nri_cpu_balloons/tasks/main.yaml b/core/roles/nri_cpu_balloons/tasks/main.yaml index 679bd199..aae9f7b1 100644 --- a/core/roles/nri_cpu_balloons/tasks/main.yaml +++ b/core/roles/nri_cpu_balloons/tasks/main.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- @@ -20,6 +20,25 @@ # ============================================================================ # Default Configuration # ============================================================================ + +- name: Auto-compute nri_reserved_cpu_list if not provided + ansible.builtin.include_role: + name: utils + tasks_from: get_optimized_cpu_topology + when: + - nri_reserved_cpu_list is not defined or (nri_reserved_cpu_list | string | length) == 0 + +- name: Debug normalized nri_reserved_cpu_list + ansible.builtin.debug: + msg: "nri_reserved_cpu_list={{ nri_reserved_cpu_list }}" + +- name: Validate nri_reserved_cpu_list + ansible.builtin.assert: + that: + - nri_reserved_cpu_list is defined + - (nri_reserved_cpu_list | regex_search('^[0-9]+([,-][0-9]+)*$')) is not none + fail_msg: "nri_reserved_cpu_list is missing or malformed. Expected cpuset syntax like '0,1,32,33'." + - name: Set default NRI CPU balloons configuration if not defined ansible.builtin.set_fact: nri_cpu_balloons: @@ -41,7 +60,6 @@ generic_balloon: name: "vllm-balloon" match_expression: "name=vllm" - preserve_existing: true policy_settings: preferIsolCpus: false preferNewBalloons: true @@ -158,93 +176,77 @@ tags: - install -- name: Calculate expected balloon configuration for comparison +- name: Check if reserved balloon exists ansible.builtin.set_fact: - expected_balloon_config: - name: "{{ generic_balloon.name }}" - namespaces: - - default - matchExpressions: - - key: "{{ generic_balloon.match_expression.split('=')[0] }}" - operator: In - values: - - "{{ generic_balloon.match_expression.split('=')[1] }}" - allocatorPriority: "high" - allocatorTopologyBalancing: true - # CPU resource allocation moved to Helm chart values.yaml resources section - preferSpreadOnPhysicalCores: false - preferIsolCpus: "{{ generic_balloon.policy_settings.preferIsolCpus }}" - preferNewBalloons: "{{ generic_balloon.policy_settings.preferNewBalloons }}" - hideHyperthreads: "{{ generic_balloon.policy_settings.hideHyperthreads }}" - pinCpus: true - pinMemory: false + reserved_balloon_exists: >- + {{ + existing_config.config.balloonTypes | default([]) + | selectattr('name', 'equalto', 'reserved') + | list | length > 0 + }} when: vllm_balloon_exists tags: - install -- name: Re-evaluate if existing balloon configuration needs update +- name: Check if existing config matches simplified balloon structure ansible.builtin.set_fact: balloon_needs_update: >- {{ vllm_balloon_exists and ( - existing_balloon_config.get('allocatorPriority') != expected_balloon_config.allocatorPriority or - existing_balloon_config.get('allocatorTopologyBalancing') != expected_balloon_config.allocatorTopologyBalancing or - existing_balloon_config.get('preferSpreadOnPhysicalCores') != expected_balloon_config.preferSpreadOnPhysicalCores or - existing_balloon_config.get('preferIsolCpus') != expected_balloon_config.preferIsolCpus or - existing_balloon_config.get('preferNewBalloons') != expected_balloon_config.preferNewBalloons or - existing_balloon_config.get('hideHyperthreads') != expected_balloon_config.hideHyperthreads or - existing_balloon_config.get('pinCpus') != expected_balloon_config.pinCpus or - existing_balloon_config.get('pinMemory') != expected_balloon_config.pinMemory or - existing_balloon_config.get('namespaces', []) != expected_balloon_config.namespaces or - existing_balloon_config.get('matchExpressions', []) != expected_balloon_config.matchExpressions or + not reserved_balloon_exists | default(false) or + existing_balloon_config.get('allocatorPriority') != 'high' or + existing_config.config.get('agent', {}).get('nodeResourceTopology') != true or + existing_config.config.get('agent', {}).get('podResourceAPI') != false or + existing_config.config.get('pinCPU') != true or + existing_config.config.get('pinMemory') != false or + existing_config.config.get('allocatorTopologyBalancing') != true or existing_balloon_config.get('preferSpreadOnNuma') is defined or existing_balloon_config.get('preserveCpus') is defined or - existing_balloon_config.get('allocatorPriority', '') == '0' or - existing_balloon_config.get('allocatorPriority', '') | type_debug == 'int' + existing_balloon_config.get('preferSpreadOnPhysicalCores') is defined or + existing_balloon_config.get('preferIsolCpus') is defined or + existing_balloon_config.get('preferNewBalloons') is defined or + existing_balloon_config.get('hideHyperthreads') is defined or + existing_balloon_config.get('pinCpus') is defined or + existing_balloon_config.get('pinMemory') is defined or + existing_balloon_config.get('namespaces') is defined ) }} when: vllm_balloon_exists tags: - install -- name: Debug balloon comparison details +- name: Debug simplified balloon validation ansible.builtin.debug: msg: - "===================================================================" - - "Balloon Configuration Comparison Details" + - "Simplified Balloon Configuration Validation" - "===================================================================" - - "Existing balloon configuration:" - - " allocatorPriority: {{ existing_balloon_config.get('allocatorPriority', 'NOT_SET') }}" - - " allocatorTopologyBalancing: {{ existing_balloon_config.get('allocatorTopologyBalancing', 'NOT_SET') }}" - - " preferSpreadOnPhysicalCores: {{ existing_balloon_config.get('preferSpreadOnPhysicalCores', 'NOT_SET') }}" - - " preferIsolCpus: {{ existing_balloon_config.get('preferIsolCpus', 'NOT_SET') }}" - - " preferNewBalloons: {{ existing_balloon_config.get('preferNewBalloons', 'NOT_SET') }}" - - " hideHyperthreads: {{ existing_balloon_config.get('hideHyperthreads', 'NOT_SET') }}" - - " pinCpus: {{ existing_balloon_config.get('pinCpus', 'NOT_SET') }}" - - " pinMemory: {{ existing_balloon_config.get('pinMemory', 'NOT_SET') }}" - - " namespaces: {{ existing_balloon_config.get('namespaces', 'NOT_SET') }}" - - " matchExpressions: {{ existing_balloon_config.get('matchExpressions', 'NOT_SET') }}" + - "Required Balloons:" + - " vllm-balloon exists: {{ 'YES' if vllm_balloon_exists else 'NO' }}" + - " reserved balloon exists: {{ 'YES' if reserved_balloon_exists | default(false) else 'NO' }}" - "" - - "Expected balloon configuration:" - - " allocatorPriority: {{ expected_balloon_config.allocatorPriority }}" - - " allocatorTopologyBalancing: {{ expected_balloon_config.allocatorTopologyBalancing }}" - - " preferSpreadOnPhysicalCores: {{ expected_balloon_config.preferSpreadOnPhysicalCores }}" - - " preferIsolCpus: {{ expected_balloon_config.preferIsolCpus }}" - - " preferNewBalloons: {{ expected_balloon_config.preferNewBalloons }}" - - " hideHyperthreads: {{ expected_balloon_config.hideHyperthreads }}" - - " pinCpus: {{ expected_balloon_config.pinCpus }}" - - " pinMemory: {{ expected_balloon_config.pinMemory }}" - - " namespaces: {{ expected_balloon_config.namespaces }}" - - " matchExpressions: {{ expected_balloon_config.matchExpressions }}" + - "Required Global Settings:" + - " agent.nodeResourceTopology: {{ existing_config.config.get('agent', {}).get('nodeResourceTopology', 'NOT_SET') }} (need: true)" + - " agent.podResourceAPI: {{ existing_config.config.get('agent', {}).get('podResourceAPI', 'NOT_SET') }} (need: false)" + - " pinCPU: {{ existing_config.config.get('pinCPU', 'NOT_SET') }} (need: true)" + - " pinMemory: {{ existing_config.config.get('pinMemory', 'NOT_SET') }} (need: false)" + - " allocatorTopologyBalancing: {{ existing_config.config.get('allocatorTopologyBalancing', 'NOT_SET') }} (need: true)" - "" - - "Note: CPU resource allocation now handled in Helm chart resources section" - - "Deprecated fields present in existing config:" - - " preferSpreadOnNuma: {{ 'YES' if existing_balloon_config.get('preferSpreadOnNuma') is defined else 'NO' }}" - - " preserveCpus: {{ 'YES' if existing_balloon_config.get('preserveCpus') is defined else 'NO' }}" - - "Global configuration issues:" - - " preserve section missing: {{ 'YES' if existing_config.config.get('preserve') is not defined else 'NO' }}" - - " global pinMemory incorrect: {{ 'YES' if existing_config.config.get('pinMemory', true) != false else 'NO' }}" - - "Louie: Need to allocate cpu with sub-NUMA awareness : set node-granularity : 'numa' , allocated-full-node: true and set prefer-nodes: [0,1] " + - "vllm-balloon Settings:" + - " allocatorPriority: {{ existing_balloon_config.get('allocatorPriority', 'NOT_SET') }} (need: high)" + - "" + - "OLD per-balloon fields detected (should NOT exist):" + - " preferSpreadOnNuma: {{ 'YES (NEEDS UPDATE)' if existing_balloon_config.get('preferSpreadOnNuma') is defined else 'NO (OK)' }}" + - " preserveCpus: {{ 'YES (NEEDS UPDATE)' if existing_balloon_config.get('preserveCpus') is defined else 'NO (OK)' }}" + - " preferSpreadOnPhysicalCores: {{ 'YES (NEEDS UPDATE)' if existing_balloon_config.get('preferSpreadOnPhysicalCores') is defined else 'NO (OK)' }}" + - " preferIsolCpus: {{ 'YES (NEEDS UPDATE)' if existing_balloon_config.get('preferIsolCpus') is defined else 'NO (OK)' }}" + - " preferNewBalloons: {{ 'YES (NEEDS UPDATE)' if existing_balloon_config.get('preferNewBalloons') is defined else 'NO (OK)' }}" + - " hideHyperthreads: {{ 'YES (NEEDS UPDATE)' if existing_balloon_config.get('hideHyperthreads') is defined else 'NO (OK)' }}" + - " pinCpus: {{ 'YES (NEEDS UPDATE)' if existing_balloon_config.get('pinCpus') is defined else 'NO (OK)' }}" + - " pinMemory: {{ 'YES (NEEDS UPDATE)' if existing_balloon_config.get('pinMemory') is defined else 'NO (OK)' }}" + - " namespaces: {{ 'YES (NEEDS UPDATE)' if existing_balloon_config.get('namespaces') is defined else 'NO (OK)' }}" + - "" + - "Update needed: {{ 'YES' if balloon_needs_update | default(false) else 'NO' }}" - "===================================================================" when: vllm_balloon_exists tags: @@ -269,7 +271,6 @@ vars: balloon_name: "{{ generic_balloon.name }}" match_expression: "{{ generic_balloon.match_expression }}" - preserve_existing: "{{ generic_balloon.preserve_existing }}" prefer_isol_cpus: "{{ generic_balloon.policy_settings.preferIsolCpus }}" prefer_new_balloons: "{{ generic_balloon.policy_settings.preferNewBalloons }}" hide_hyperthreads: "{{ generic_balloon.policy_settings.hideHyperthreads }}" @@ -324,72 +325,23 @@ tags: - install -- name: Initialize base configuration if none exists - ansible.builtin.set_fact: - base_config: - config: - reservedResources: - cpu: 1000m - allocatorTopologyBalancing: true - reservedPoolNamespaces: - - kube-system - - ingress-nginx - - keycloak - - monitoring - preserve: - matchExpressions: - - key: name - operator: NotIn - values: - - vllm - balloonTypes: [] - instrumentation: - httpEndpoint: ":8891" - prometheusExport: "{{ enable_prometheus | default(false) }}" - reportPeriod: 60s - log: - source: true - debug: - - policy - # Global pin settings - pinMemory: false - when: - - not vllm_balloon_exists or balloon_needs_update | default(false) - - existing_config == {} - tags: - - install - -- name: Use existing configuration as base - ansible.builtin.set_fact: - base_config: "{{ existing_config }}" - when: - - not vllm_balloon_exists or balloon_needs_update | default(false) - - existing_config != {} +- name: Debug decision state before applying template configuration + ansible.builtin.debug: + msg: + - "existing_config_empty={{ existing_config == {} }}" + - "existing_config_keys={{ existing_config.keys() | default([]) }}" + - "vllm_balloon_exists={{ vllm_balloon_exists }}" + - "balloon_needs_update={{ balloon_needs_update | default(false) }}" + - "nri_reserved_cpu_list={{ nri_reserved_cpu_list }}" + - "enable_prometheus={{ enable_prometheus | default(false) }}" + - "Using simplified balloon policy from template" tags: - install -- name: Remove existing balloon if updating - ansible.builtin.set_fact: - balloons_without_old: >- - {{ - base_config.config.balloonTypes | default([]) - | rejectattr('name', 'equalto', generic_balloon.name) - | list - }} - when: - - not vllm_balloon_exists or balloon_needs_update | default(false) - - balloon_needs_update | default(false) - tags: - - install -- name: Add generic balloon type to configuration (new or updated) +- name: Use complete template configuration directly ansible.builtin.set_fact: - updated_config: >- - {%- if balloon_needs_update | default(false) -%} - {{ base_config | combine({'config': base_config.config | combine({'balloonTypes': balloons_without_old + [new_balloon_config.config.balloonTypes[0]]})}, recursive=True) }} - {%- else -%} - {{ base_config | combine({'config': base_config.config | combine({'balloonTypes': base_config.config.balloonTypes + [new_balloon_config.config.balloonTypes[0]]})}, recursive=True) }} - {%- endif -%} + updated_config: "{{ new_balloon_config }}" when: not vllm_balloon_exists or balloon_needs_update | default(false) tags: - install diff --git a/core/roles/nri_cpu_balloons/templates/generic-balloon-values.yaml.j2 b/core/roles/nri_cpu_balloons/templates/generic-balloon-values.yaml.j2 index 798377a4..039d8798 100644 --- a/core/roles/nri_cpu_balloons/templates/generic-balloon-values.yaml.j2 +++ b/core/roles/nri_cpu_balloons/templates/generic-balloon-values.yaml.j2 @@ -1,44 +1,30 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Generic NRI Balloon Policy Template for vLLM Models (Simplified) config: - reservedResources: - cpu: 1000m + agent: + nodeResourceTopology: true + podResourceAPI: false allocatorTopologyBalancing: true - reservedPoolNamespaces: - - kube-system - - ingress-nginx - - keycloak - - monitoring - preserve: - matchExpressions: - - key: name - operator: NotIn - values: - - vllm + pinCPU: true + pinMemory: false + reservedResources: + cpu: "cpuset:{{ nri_reserved_cpu_list }}" balloonTypes: - name: "{{ balloon_name }}" - namespaces: - - default matchExpressions: - key: "{{ match_expression.split('=')[0] }}" operator: In values: - "{{ match_expression.split('=')[1] }}" + minCPUs: {{ workload_cpus }} + maxCPUs: 1024 allocatorPriority: "high" - allocatorTopologyBalancing: true - # CPU resource allocation handled in Helm chart values.yaml resources section - preferSpreadOnPhysicalCores: false - preferIsolCpus: {{ prefer_isol_cpus | lower }} - preferNewBalloons: {{ prefer_new_balloons | lower }} - hideHyperthreads: false - pinCpus: true - pinMemory: false - instrumentation: - httpEndpoint: ":8891" - prometheusExport: {{ prometheus_enabled | lower }} - reportPeriod: 60s + - name: "reserved" + allocatorPriority: "high" + namespaces: + - "*" log: source: true debug: diff --git a/core/roles/utils/files/compute_reserved_cpus.sh b/core/roles/utils/files/compute_reserved_cpus.sh new file mode 100644 index 00000000..10377a98 --- /dev/null +++ b/core/roles/utils/files/compute_reserved_cpus.sh @@ -0,0 +1,71 @@ +#!/bin/bash +set -euo pipefail + +TOTAL_RESERVED=$1 +if [ -z "$TOTAL_RESERVED" ] || [ "$TOTAL_RESERVED" -le 0 ]; then + echo "Error: missing or invalid TOTAL_RESERVED argument (must be > 0)" >&2 + exit 1 +fi + +total_numa=$(lscpu | awk -F: '/NUMA node\(s\):/{print $2}' | tr -d ' ') +if [ -z "$total_numa" ]; then + echo "Error: failed to detect NUMA nodes from lscpu" >&2 + exit 1 +fi + +threads_per_core=$(lscpu | awk '/Thread.*per core:/{print $4}') +[ -n "$threads_per_core" ] || threads_per_core=1 + +ht_enabled=false +[ "$threads_per_core" -eq 2 ] && ht_enabled=true + +cpus_per_numa=$(( (TOTAL_RESERVED + total_numa - 1) / total_numa )) +out="" + +for i in $(seq 0 $((total_numa - 1))); do + line=$(lscpu | grep "NUMA node$i CPU" | cut -d: -f2 | xargs) + if [ -z "$line" ]; then + echo "Error: failed to read NUMA node$i CPU list from lscpu" >&2 + exit 1 + fi + IFS=',' read -ra segments <<< "$line" + declare -a all_cpus=() + for seg in "${segments[@]}"; do + seg=$(echo "$seg" | xargs) + if [[ "$seg" == *"-"* ]]; then + IFS='-' read -r start end <<< "$seg" + for ((c=start; c<=end; c++)); do all_cpus+=("$c"); done + else + all_cpus+=("$seg") + fi + done + IFS=$'\n' sorted=($(printf '%s\n' "${all_cpus[@]}" | sort -n)) + unset IFS + total_cpus_in_numa=${#sorted[@]} + if [ "$ht_enabled" = true ] && [ "$total_cpus_in_numa" -ge "$((cpus_per_numa * 2))" ]; then + half=$((total_cpus_in_numa / 2)) + physical_half=("${sorted[@]:0:$half}") + ht_half=("${sorted[@]:$half:$half}") + reserve_from_physical=$(( (cpus_per_numa + 1) / 2 )) + reserve_from_ht=$(( cpus_per_numa - reserve_from_physical )) + selected=() + for ((j=0; j&2 + exit 1 +fi +echo "NRI_RESERVED_CPU_LIST=$out" diff --git a/core/roles/utils/tasks/get_optimized_cpu_topology.yaml b/core/roles/utils/tasks/get_optimized_cpu_topology.yaml index c10f0b79..f1b6111e 100644 --- a/core/roles/utils/tasks/get_optimized_cpu_topology.yaml +++ b/core/roles/utils/tasks/get_optimized_cpu_topology.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- @@ -28,17 +28,9 @@ requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: role + - key: ei-inference-eligible operator: In - values: ["inference"] - - matchExpressions: - - key: role - operator: In - values: ["inference-xeon"] - - matchExpressions: - - key: role - operator: In - values: ["inference-cpu"] + values: ["true"] tolerations: - operator: Exists # Allow scheduling on any node including master securityContext: @@ -225,6 +217,17 @@ tags: - install +- name: Refresh node-topology-optimizer pod name (avoid stale pod references) + ansible.builtin.command: > + kubectl -n kube-system get pods -l app=node-topology-optimizer + -o jsonpath='{.items[0].metadata.name}' + register: optimizer_pod_name_refresh + changed_when: false + +- name: Set refreshed optimizer_pod_name + ansible.builtin.set_fact: + optimizer_pod_name: "{{ optimizer_pod_name_refresh.stdout | trim }}" + - name: Get detailed CPU information kubernetes.core.k8s_exec: pod: "{{ optimizer_pod_name }}" @@ -277,7 +280,7 @@ for s in $(seq 0 $((total_sockets - 1))); do socket_totals[$s]=0; done; - + for i in $(seq 0 $((total_numa - 1))); do numa_cpus=$(lscpu | grep "NUMA node$i CPU(s):" | cut -d: -f2 | xargs); if [[ "$numa_cpus" != "" ]]; then @@ -312,6 +315,43 @@ tags: - install +- name: Read CPU reservation script content + ansible.builtin.set_fact: + cpu_script_content: "{{ lookup('file', role_path + '/files/compute_reserved_cpus.sh') }}" + tags: + - install + +- name: Execute CPU reservation computation directly from script file + ansible.builtin.shell: + cmd: echo {{ cpu_script_content | quote }} | kubectl exec -n kube-system {{ optimizer_pod_name | quote }} -c ubuntu -i -- bash -s {{ nri_total_reserved_cpus | default(8) | int }} + register: computed_reserved_cpu_list + changed_when: false + tags: + - install + +- name: Set nri_reserved_cpu_list fact + ansible.builtin.set_fact: + nri_reserved_cpu_list: >- + {{ + (computed_reserved_cpu_list.stdout | default('') | trim) + | regex_replace('^.*NRI_RESERVED_CPU_LIST=', '') + | regex_replace('\\s.*$', '') + | regex_replace(',+$', '') + }} + +- name: Assert nri_reserved_cpu_list computed + ansible.builtin.assert: + that: + - nri_reserved_cpu_list | length > 0 + fail_msg: >- + Failed to compute nri_reserved_cpu_list. + stdout={{ computed_reserved_cpu_list.stdout | default('') }} + stderr={{ computed_reserved_cpu_list.stderr | default('') }} + +- name: Debug final nri_reserved_cpu_list + ansible.builtin.debug: + msg: "nri_reserved_cpu_list={{ nri_reserved_cpu_list }}" + - name: Parse accurate CPU topology results ansible.builtin.set_fact: numa_cpu_counts: >- @@ -526,6 +566,22 @@ tags: - install +- name: Calculate workload CPUs + ansible.builtin.set_fact: + workload_cpus: >- + {%- set total_cpus = (cpus_per_socket | int) * (total_sockets.stdout | int) -%} + {%- set reserved_cpu_count = nri_reserved_cpu_list.split(',') | length -%} + {%- if reserved_cpu_count >= total_cpus -%}{%- set reserved_cpu_count = total_cpus -%}{%- endif -%} + {{ total_cpus - reserved_cpu_count }} + tags: + - install + +- name: Add workload_cpus to balloon configuration + ansible.builtin.set_fact: + optimal_balloon_config: "{{ optimal_balloon_config | combine({'workload_cpus': workload_cpus | int}) }}" + tags: + - install + - name: Check for AMX support kubernetes.core.k8s_exec: pod: "{{ optimizer_pod_name }}" @@ -573,8 +629,8 @@ {% endfor %} === CPU Features === - AMX: {{ 'SUPPORTED' if amx_check.rc == 0 else 'NOT AVAILABLE' }} - AVX-512: {{ 'SUPPORTED' if avx512_check.rc == 0 else 'NOT AVAILABLE' }} + AMX: {{ 'SUPPORTED' if (amx_check is defined and amx_check.rc is defined and amx_check.rc == 0) else 'NOT AVAILABLE' }} + AVX-512: {{ 'SUPPORTED' if (avx512_check is defined and avx512_check.rc is defined and avx512_check.rc == 0) else 'NOT AVAILABLE' }} === Parallelism Strategy === Strategy: {{ optimal_balloon_config.strategy }} diff --git a/core/scripts/gaudi-firmware-driver-updater.sh b/core/scripts/gaudi-firmware-driver-updater.sh index 6fb50a33..536dc0ed 100644 --- a/core/scripts/gaudi-firmware-driver-updater.sh +++ b/core/scripts/gaudi-firmware-driver-updater.sh @@ -6,7 +6,7 @@ YELLOW=$(tput setaf 3) BLUE=$(tput setaf 4) NC=$(tput sgr0) # Reset color -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Permission is granted for recipient to internally use and modify this software for purposes of benchmarking and testing on Intel architectures. # This software is provided "AS IS" possibly with faults, bugs or errors; it is not intended for production use, and recipient uses this design at their own risk with no liability to Intel. diff --git a/core/scripts/generate-vault-secrets.sh b/core/scripts/generate-vault-secrets.sh index 0337903e..24de3dce 100644 --- a/core/scripts/generate-vault-secrets.sh +++ b/core/scripts/generate-vault-secrets.sh @@ -33,6 +33,8 @@ MINIO_SECRET=$(generate_password 20) POSTGRES_USER="postgres" POSTGRES_PASSWORD=$(generate_password 20) GRAFANA_ADMIN_PASSWORD=$(generate_password 20) +AGENTICAI_POSTGRES_PASSWORD=$(generate_password 20) +AGENTICAI_REDIS_PASSWORD=$(generate_password 20) # Generate connection strings DATABASE_URL="postgresql://admin:${POSTGRESQL_PASSWORD}@genai-gateway-postgresql:5432/litellm" @@ -67,6 +69,8 @@ minio_user: "$MINIO_USER" postgres_user: "$POSTGRES_USER" postgres_password: "$POSTGRES_PASSWORD" grafana_admin_password: "$GRAFANA_ADMIN_PASSWORD" +agenticai_postgres_password: "$AGENTICAI_POSTGRES_PASSWORD" +agenticai_redis_password: "$AGENTICAI_REDIS_PASSWORD" EOF # Set appropriate permissions diff --git a/core/scripts/keycloak-fetch-client-secret.sh b/core/scripts/keycloak-fetch-client-secret.sh index 6d7efba9..bbd3427c 100644 --- a/core/scripts/keycloak-fetch-client-secret.sh +++ b/core/scripts/keycloak-fetch-client-secret.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Permission is granted for recipient to internally use and modify this software for purposes of benchmarking and testing on Intel architectures. # This software is provided "AS IS" possibly with faults, bugs or errors; it is not intended for production use, and recipient uses this design at their own risk with no liability to Intel. diff --git a/core/scripts/keycloak-realmcreation.sh b/core/scripts/keycloak-realmcreation.sh index 4330b07c..87ed0b67 100644 --- a/core/scripts/keycloak-realmcreation.sh +++ b/core/scripts/keycloak-realmcreation.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Permission is granted for recipient to internally use and modify this software for purposes of benchmarking and testing on Intel architectures. # This software is provided "AS IS" possibly with faults, bugs or errors; it is not intended for production use, and recipient uses this design at their own risk with no liability to Intel. diff --git a/core/scripts/vllm-quickstart/README.md b/core/scripts/vllm-quickstart/README.md new file mode 100644 index 00000000..7dba3473 --- /dev/null +++ b/core/scripts/vllm-quickstart/README.md @@ -0,0 +1,194 @@ +## 📋 Overview + +The `vllm-model-runner.sh` launcher script simplifies the deployment of popular open-source LLMs with optimized configurations for CPU-based inference. It handles dependency installation, hardware detection, Docker container management, and health monitoring automatically. + +## ✨ Features + +- **One-Command Deployment** — Interactive model selection and automated setup +- **Multi-Model Support** — Pre-configured profiles for popular LLMs +- **Custom Port Configuration** — Run the server on any port with `-p` option +- **Hardware Auto-Detection** — Automatically configures tensor/pipeline parallelism based on NUMA topology +- **Dependency Management** — Installs Docker, jq, curl, and git if missing +- **Container Lifecycle Management** — Gracefully handles existing containers +- **Health Monitoring** — Built-in health checks with detailed logging +- **Tool Calling Support** — Pre-configured for function/tool calling capabilities + +## 📦 Prerequisites + +- **Operating System**: Ubuntu +- **HuggingFace Token**: Required for downloading models +- **Sudo Access**: Required for dependency installation +- **Hardware**: CPU with sufficient RAM for model inference + +> **Note:** The script will automatically install Docker, jq, curl, and git if they are not present. + +## 🛠️ Installation + +1. **Set your HuggingFace token:** + ```bash + export HFToken="your_huggingface_token_here" + ``` + +2. **Make the script executable:** + ```bash + chmod +x vllm-model-runner.sh + ``` + +## 🎯 Usage + +### Quick Start + +```bash +./vllm-model-runner.sh +``` + +To run on a custom port: + +```bash +./vllm-model-runner.sh -p 8080 +# or +./vllm-model-runner.sh --port 8080 +``` + +The script will: +1. Check and install any missing dependencies +2. Validate your environment and HuggingFace token +3. Display available models for selection +4. Detect hardware configuration for optimal parallelism +5. Pull the vLLM Docker image (if not cached) +6. Start the vLLM server container +7. Perform health checks until the server is ready + +### Example Session + +``` +[INFO] Starting vLLM Model Launcher +[INFO] Server will run on port: 8000 +[INFO] Checking and installing prerequisites... +[SUCCESS] All prerequisites are satisfied + +Available Models: + + 1) Llama 3.1 8B Instruct + 2) Qwen 3 14B + 3) Mistral 7B Instruct v0.3 + +Enter the number of the model you want to start: +> 1 + +[INFO] User selected model: llama-8B +[INFO] Starting vLLM container for model: llama-8B +[SUCCESS] vLLM server is running successfully at http://localhost:8000/health +✅ vLLM server is running successfully at http://localhost:8000/health +``` + +### API Endpoints + +Once running, the vLLM server exposes an OpenAI-compatible API (replace `8000` with your custom port if specified): + +| Endpoint | Description | +|----------|-------------| +| `http://localhost:8000/health` | Health check endpoint | +| `http://localhost:8000/v1/chat/completions` | Chat completions API | +| `http://localhost:8000/v1/completions` | Text completions API | +| `http://localhost:8000/v1/models` | List available models | + +### Example API Call + +```bash +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "meta-llama/Llama-3.1-8B-Instruct", + "messages": [ + {"role": "user", "content": "Hello, how are you?"} + ], + "max_tokens": 100 + }' +``` + +## ⚙️ Configuration + +### models.json Structure + +The `models.json` file contains all configuration: + +```json +{ + "docker": { + "image": "public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.11.2", + "port": "8000:8000", + "environment": { ... }, + "volumes": [ ... ] + }, + "global_defaults": { + "block_size": 128, + "dtype": "bfloat16", + "trust_remote_code": true, + ... + }, + "models": { + "model-key": { + "display_name": "Human Readable Name", + "model_path": "org/model-name", + "vllm_args": { ... } + } + } +} +``` + +### Adding a New Model + +Add a new entry under the `models` section in `models.json`: + +```json +"my-model": { + "display_name": "My Custom Model", + "model_path": "organization/model-name", + "vllm_args": { + "max_model_len": 8192, + "tool_call_parser": "hermes" + } +} +``` + +## 📁 Project Structure + +``` +. +├── README.md # This file +├── models.json # Model configurations and Docker settings +└── vllm-model-runner.sh # Main launcher script +``` + +## 🔧 Troubleshooting + +### View Logs + +```bash +# Startup logs +cat /tmp/vllm-startup.log + +# Container logs +docker logs vllm-container + +# Follow container logs in real-time +docker logs -f vllm-container +``` + +### Common Issues + +| Issue | Solution | +|-------|----------| +| `HFToken is not set` | Export your HuggingFace token: `export HFToken="hf_..."` | +| `Docker daemon not running` | Start Docker: `sudo systemctl start docker` | +| `Permission denied` | Add user to docker group: `sudo usermod -aG docker $USER` then logout/login | +| `Container keeps stopping` | Check logs: `docker logs vllm-container` — usually indicates insufficient memory | +| `Health check timeout` | Model loading can take several minutes; check logs for progress | + +### Stop the Server + +```bash +docker stop vllm-container +docker rm vllm-container +``` diff --git a/core/scripts/vllm-quickstart/models.json b/core/scripts/vllm-quickstart/models.json new file mode 100644 index 00000000..e7e24eff --- /dev/null +++ b/core/scripts/vllm-quickstart/models.json @@ -0,0 +1,58 @@ +{ + "docker": { + "image": "public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.11.2", + "port": "8000:8000", + "environment": { + "VLLM_CPU_SGL_KERNEL": "1", + "VLLM_CPU_KVCACHE_SPACE": "40", + "VLLM_RPC_TIMEOUT": "100000", + "VLLM_ALLOW_LONG_MAX_MODEL_LEN": "1", + "VLLM_ENGINE_ITERATION_TIMEOUT_S": "120", + "VLLM_CPU_NUM_OF_RESERVED_CPU": "0" + }, + "volumes": [ + "/root/.cache/huggingface:/root/.cache/huggingface", + "/opt/vllm/examples:/workspace/examples" + ] + }, + "global_defaults": { + "block_size": 128, + "dtype": "bfloat16", + "distributed_executor_backend": "mp", + "trust_remote_code": true, + "enable_chunked_prefill": true, + "enforce_eager": true, + "max_num_batched_tokens": 2048, + "max_num_seqs": 256, + "disable_log_requests": true, + "enable_auto_tool_choice": true + }, + "models": { + "llama-8B": { + "display_name": "Llama 3.1 8B Instruct", + "model_path": "meta-llama/Llama-3.1-8B-Instruct", + "vllm_args": { + "max_model_len": 32768, + "tool_call_parser": "llama3_json", + "chat_template": "examples/tool_chat_template_llama3.1_json.jinja" + } + }, + "qwen-14B": { + "display_name": "Qwen 3 14B", + "model_path": "Qwen/Qwen3-14B", + "vllm_args": { + "max_model_len": 16384, + "tool_call_parser": "hermes" + } + }, + "mistral-7B": { + "display_name": "Mistral 7B Instruct v0.3", + "model_path": "mistralai/Mistral-7B-Instruct-v0.3", + "vllm_args": { + "max_model_len": 32768, + "tool_call_parser": "mistral", + "chat_template": "examples/tool_chat_template_mistral_parallel.jinja" + } + } + } +} \ No newline at end of file diff --git a/core/scripts/vllm-quickstart/vllm-model-runner.sh b/core/scripts/vllm-quickstart/vllm-model-runner.sh new file mode 100755 index 00000000..881cf02d --- /dev/null +++ b/core/scripts/vllm-quickstart/vllm-model-runner.sh @@ -0,0 +1,833 @@ +#!/bin/bash + +# Copyright (C) 2024-2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +#============================================================================= +# vLLM Model Launcher +# A modular script to launch different LLM models with vLLM +#============================================================================= + +# Configuration +readonly CONFIG_FILE="models.json" +readonly LOG_FILE="/tmp/vllm-startup.log" +readonly CONTAINER_NAME="vllm-container" + +# Port configuration (can be overridden via command line) +PORT="8000" +HEALTHCHECK_URL="http://localhost:${PORT}/health" + +# Colors for output +readonly RED='\033[0;31m' +readonly GREEN='\033[0;32m' +readonly BLUE='\033[0;34m' +readonly YELLOW='\033[1;33m' +readonly NC='\033[0m' + +#============================================================================= +# ARGUMENT PARSING +#============================================================================= + +# Display usage information +show_usage() { + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " -p, --port PORT Port to run the vLLM server on (default: 8000)" + echo " -h, --help Display this help message" + echo "" + echo "Examples:" + echo " $0 # Start vLLM on default port 8000" + echo " $0 -p 8080 # Start vLLM on port 8080" + echo " $0 --port 9000 # Start vLLM on port 9000" +} + +# Parse command line arguments +parse_arguments() { + while [[ $# -gt 0 ]]; do + case "$1" in + -p|--port) + if [[ -z "$2" || "$2" == -* ]]; then + echo "Error: --port requires a port number" + show_usage + exit 1 + fi + # Validate port number + if ! [[ "$2" =~ ^[0-9]+$ ]] || (( $2 < 1 || $2 > 65535 )); then + echo "Error: Invalid port number '$2'. Must be between 1 and 65535." + exit 1 + fi + PORT="$2" + shift 2 + ;; + -h|--help) + show_usage + exit 0 + ;; + *) + echo "Error: Unknown option '$1'" + show_usage + exit 1 + ;; + esac + done + + # Update HEALTHCHECK_URL with the configured port + HEALTHCHECK_URL="http://localhost:${PORT}/health" +} + +#============================================================================= +# UTILITY FUNCTIONS +#============================================================================= + +# Logging function with levels and colors +log() { + local level="$1" + local message="$2" + local color="$NC" + + case "$level" in + INFO) color="$BLUE" ;; + SUCCESS) color="$GREEN" ;; + ERROR) color="$RED" ;; + WARN) color="$YELLOW" ;; + esac + + # Use echo instead of printf and redirect to ensure clean output + echo -e "${color}[${level}]${NC} ${message}" + echo "$(date '+%Y-%m-%d %H:%M:%S') - [$level] $message" >> "$LOG_FILE" +} + +# Clean exit function +cleanup_and_exit() { + local exit_code="${1:-1}" + local message="${2:-Script terminated}" + + if [[ "$exit_code" -ne 0 ]]; then + log "ERROR" "$message" + printf "${RED}❌ %s${NC}\n" "$message" + printf "${YELLOW}Check %s for detailed logs.${NC}\n" "$LOG_FILE" + fi + + exit "$exit_code" +} + +#============================================================================= +# CONFIGURATION FUNCTIONS +#============================================================================= + +# Install Docker following official Ubuntu installation guide +install_docker() { + log "INFO" "Installing Docker..." + + # Update package index + sudo apt-get update -y >/dev/null 2>&1 + + # Install prerequisite packages + sudo apt-get install -y ca-certificates curl gnupg lsb-release >/dev/null 2>&1 + + # Add Docker's official GPG key + sudo mkdir -p /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg >/dev/null 2>&1 + + # Set up the Docker repository + echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list >/dev/null + + # Update package index with Docker repository + sudo apt-get update -y >/dev/null 2>&1 + + # Install Docker Engine + sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin >/dev/null 2>&1 + + # Add current user to docker group to run docker without sudo + sudo usermod -aG docker $USER + + # Start and enable Docker service + sudo systemctl start docker + sudo systemctl enable docker + + log "SUCCESS" "Docker installed successfully" + log "WARN" "Please log out and log back in for Docker group membership to take effect" + log "WARN" "Or run: newgrp docker" +} + +# Install jq JSON processor +install_jq() { + log "INFO" "Installing jq..." + sudo apt-get update -y >/dev/null 2>&1 + sudo apt-get install -y jq >/dev/null 2>&1 + log "SUCCESS" "jq installed successfully" +} + +# Install curl if missing +install_curl() { + log "INFO" "Installing curl..." + sudo apt-get update -y >/dev/null 2>&1 + sudo apt-get install -y curl >/dev/null 2>&1 + log "SUCCESS" "curl installed successfully" +} + +# Install git if missing +install_git() { + log "INFO" "Installing git..." + sudo apt-get update -y >/dev/null 2>&1 + sudo apt-get install -y git >/dev/null 2>&1 + log "SUCCESS" "git installed successfully" +} + +# Clone vLLM repository to /opt/vllm +clone_vllm_repository() { + local vllm_path="/opt/vllm" + + log "INFO" "Setting up vLLM repository at $vllm_path..." + + # Check if directory already exists and has content + if [[ -d "$vllm_path" && -n "$(sudo ls -A "$vllm_path" 2>/dev/null)" ]]; then + log "INFO" "vLLM repository already exists at $vllm_path" + + # Check if it's a git repository and has the examples directory + if sudo test -d "$vllm_path/.git" && sudo test -d "$vllm_path/examples"; then + log "INFO" "Updating existing vLLM repository..." + if sudo git -C "$vllm_path" pull origin main >> "$LOG_FILE" 2>&1; then + log "SUCCESS" "vLLM repository updated successfully" + return 0 + else + log "WARN" "Failed to update vLLM repository, re-cloning..." + sudo rm -rf "$vllm_path" + clone_vllm_repo_fresh "$vllm_path" + fi + else + log "WARN" "Directory exists but is not a proper vLLM repository, removing and re-cloning..." + sudo rm -rf "$vllm_path" + clone_vllm_repo_fresh "$vllm_path" + fi + else + clone_vllm_repo_fresh "$vllm_path" + fi + + # Final verification that examples directory exists + if ! sudo test -d "$vllm_path/examples"; then + log "ERROR" "vLLM examples directory not found after cloning" + return 1 + fi + + log "SUCCESS" "vLLM repository setup completed" + return 0 +} + +# Helper function to clone fresh repository +clone_vllm_repo_fresh() { + local vllm_path="$1" + + log "INFO" "Cloning vLLM repository from GitHub..." + + # Create parent directory if it doesn't exist + sudo mkdir -p "$(dirname "$vllm_path")" 2>/dev/null || true + + # Clone the repository + if sudo git clone --depth 1 https://github.com/vllm-project/vllm.git "$vllm_path" >> "$LOG_FILE" 2>&1; then + log "SUCCESS" "vLLM repository cloned successfully" + + # Set proper permissions for better access (optional, but helpful for debugging) + sudo chown -R root:root "$vllm_path" + sudo chmod -R 755 "$vllm_path" + else + log "ERROR" "Failed to clone vLLM repository" + return 1 + fi +} + +# Check and install required dependencies +install_dependencies() { + log "INFO" "Checking and installing prerequisites..." + + local need_newgrp=false + local need_rerun=false + + # Check if we have sudo privileges + if ! sudo -n true 2>/dev/null; then + log "WARN" "This script requires sudo privileges to install dependencies" + log "INFO" "Please run: sudo -v" + read -p "Press Enter after running sudo -v to continue..." + fi + + # Check and install curl first (needed for Docker installation) + if ! command -v curl >/dev/null 2>&1; then + install_curl + fi + + # Check and install git (needed for vLLM repository cloning) + if ! command -v git >/dev/null 2>&1; then + install_git + fi + + # Check and install jq + if ! command -v jq >/dev/null 2>&1; then + install_jq + fi + + # Check and install Docker + if ! command -v docker >/dev/null 2>&1; then + install_docker + need_newgrp=true + need_rerun=true + fi + + # Setup vLLM repository (needed for examples volume mount) + if ! clone_vllm_repository; then + cleanup_and_exit 1 "Failed to setup vLLM repository" + fi # Check if current user is in docker group + + if ! groups | grep -q docker; then + log "WARN" "Current user is not in docker group" + # Check if we can still access Docker with sudo + if sudo docker info >/dev/null 2>&1; then + log "INFO" "Docker accessible with sudo, continuing..." + else + if [[ "$need_newgrp" == "false" ]]; then + log "INFO" "Adding user to docker group..." + sudo usermod -aG docker $USER + need_rerun=true + fi + fi + fi + + # If user was added to docker group, suggest re-running + if [[ "$need_rerun" == "true" ]]; then + log "WARN" "User has been added to docker group" + log "WARN" "Please run: newgrp docker" + log "WARN" "Then re-run this script, or log out and log back in" + exit 1 + fi + + log "SUCCESS" "All prerequisites are satisfied" +} + +# Validate required dependencies +validate_dependencies() { + # First try to install missing dependencies + install_dependencies + + # Then validate that everything is working + local missing_deps=() + + command -v jq >/dev/null || missing_deps+=("jq") + command -v docker >/dev/null || missing_deps+=("docker") + command -v curl >/dev/null || missing_deps+=("curl") + + if [[ ${#missing_deps[@]} -gt 0 ]]; then + cleanup_and_exit 1 "Failed to install dependencies: ${missing_deps[*]}. Please install them manually." + fi +} + +# Validate environment and prerequisites +validate_environment() { + log "INFO" "Validating environment..." + + # Check HuggingFace token + if [[ -z "$HFToken" ]]; then + cleanup_and_exit 1 "HUGGING_FACE_HUB_TOKEN (HFToken) is not set. Please export HFToken before running." + fi + + # Check config file + if [[ ! -f "$CONFIG_FILE" ]]; then + cleanup_and_exit 1 "Configuration file $CONFIG_FILE not found." + fi + + # Validate JSON syntax + if ! jq empty "$CONFIG_FILE" 2>/dev/null; then + cleanup_and_exit 1 "Invalid JSON syntax in $CONFIG_FILE" + fi + + # Check Docker daemon + check_docker_access + if ! ${USE_SUDO}docker info >/dev/null 2>&1; then + cleanup_and_exit 1 "Docker daemon is not running or not accessible." + fi + + log "SUCCESS" "Environment validation completed" +} + +# Global variables for model data +declare -a MODEL_KEYS +declare -g USE_SUDO="" + +# Helper function to determine if we need sudo for Docker +check_docker_access() { + if groups | grep -q docker; then + USE_SUDO="" + else + USE_SUDO="sudo " + log "WARN" "User not in docker group, using sudo for Docker commands" + fi +} + +# Load and parse configuration +load_configuration() { + log "INFO" "Loading configuration from $CONFIG_FILE" + + # Extract model list + local temp_keys + if ! temp_keys=($(jq -r '.models | keys[]' "$CONFIG_FILE" 2>/dev/null)); then + cleanup_and_exit 1 "Failed to parse model keys from configuration" + fi + + if [[ ${#temp_keys[@]} -eq 0 ]]; then + cleanup_and_exit 1 "No models found in configuration" + fi + + # Assign to global array + MODEL_KEYS=("${temp_keys[@]}") + + log "SUCCESS" "Loaded ${#MODEL_KEYS[@]} model configurations" +} + +#============================================================================= +# HARDWARE DETECTION FUNCTIONS +#============================================================================= + +# Compute parallel configuration based on hardware +compute_parallel_config() { + log "INFO" "Detecting hardware configuration..." >&2 + + local total_sockets total_numa_nodes numa_per_socket + + total_sockets=$(lscpu | grep 'Socket(s):' | awk '{print $2}' 2>/dev/null) + total_numa_nodes=$(lscpu | grep 'NUMA node(s):' | awk '{print $3}' 2>/dev/null) + + # Validate hardware detection + if [[ -z "$total_sockets" || -z "$total_numa_nodes" ]]; then + log "WARN" "Could not detect hardware configuration. Using default settings." >&2 + echo "" + return + fi + + if [[ "$total_sockets" -eq 0 ]]; then + log "WARN" "Invalid socket count detected. Using default settings." >&2 + echo "" + return + fi + + numa_per_socket=$((total_numa_nodes / total_sockets)) + + local parallel_config="" + case "$numa_per_socket" in + 2|4) + parallel_config="--tensor-parallel-size $numa_per_socket" + log "INFO" "Using tensor parallelism: $numa_per_socket" >&2 + ;; + 3|6) + parallel_config="--pipeline-parallel-size $numa_per_socket" + log "INFO" "Using pipeline parallelism: $numa_per_socket" >&2 + ;; + *) + log "INFO" "No specific parallelism configuration for $numa_per_socket NUMA nodes per socket" >&2 + ;; + esac + + echo "$parallel_config" +} + +#============================================================================= +# USER INTERACTION FUNCTIONS +#============================================================================= + +# Display available models and get user selection +select_model() { + printf "${YELLOW}Available Models:${NC}\n" >&2 + echo >&2 + + for i in "${!MODEL_KEYS[@]}"; do + local model_key="${MODEL_KEYS[$i]}" + local display_name + display_name=$(jq -r ".models.\"$model_key\".display_name" "$CONFIG_FILE") + printf "%2d) %s\n" "$((i+1))" "$display_name" >&2 + done + + echo >&2 + printf "${YELLOW}Enter the number of the model you want to start:${NC}\n" >&2 + read -p "> " choice + + # Validate user input + if ! [[ "$choice" =~ ^[0-9]+$ ]] || (( choice < 1 || choice > ${#MODEL_KEYS[@]} )); then + cleanup_and_exit 1 "Invalid choice: $choice. Please enter a number between 1 and ${#MODEL_KEYS[@]}." + fi + + # Return selected model key + echo "${MODEL_KEYS[$((choice-1))]}" +} + +#============================================================================= +# DOCKER MANAGEMENT FUNCTIONS +#============================================================================= + +# Build vLLM arguments from configuration +build_vllm_args() { + local model_key="$1" + local parallel_config="$2" + + log "INFO" "Building vLLM arguments for model: $model_key" >&2 + + # Get model path + local model_path + model_path=$(jq -r ".models.\"$model_key\".model_path" "$CONFIG_FILE") + + if [[ "$model_path" == "null" || -z "$model_path" ]]; then + cleanup_and_exit 1 "Model path not found for $model_key" + fi + + # Start building arguments + local args="--model $model_path" + + # Add global defaults + while IFS='=' read -r key value; do + if [[ "$value" == "true" ]]; then + args="$args --$key" + elif [[ "$value" != "false" && "$value" != "null" ]]; then + args="$args --$(echo "$key" | tr '_' '-') $value" + fi + done < <(jq -r '.global_defaults | to_entries[] | "\(.key)=\(.value)"' "$CONFIG_FILE") + + # Add model-specific arguments + while IFS='=' read -r key value; do + if [[ "$value" == "true" ]]; then + args="$args --$key" + elif [[ "$value" != "false" && "$value" != "null" ]]; then + args="$args --$(echo "$key" | tr '_' '-') $value" + fi + done < <(jq -r ".models.\"$model_key\".vllm_args | to_entries[] | \"\(.key)=\(.value)\"" "$CONFIG_FILE" 2>/dev/null) + + # Add parallel configuration + if [[ -n "$parallel_config" ]]; then + args="$args $parallel_config" + fi + + echo "$args" +} + +# Stop existing vLLM container +stop_existing_container() { + log "INFO" "Checking for existing vLLM containers..." + + # Check for both running and stopped containers with the same name + local existing_container + existing_container=$(${USE_SUDO}docker ps -aq --filter "name=$CONTAINER_NAME" 2>/dev/null) + + if [[ -n "$existing_container" ]]; then + log "INFO" "Stopping existing container: $existing_container" + + # Stop the container if it's running + if ${USE_SUDO}docker ps -q --filter "name=$CONTAINER_NAME" | grep -q "$existing_container"; then + if ! ${USE_SUDO}docker stop "$existing_container" >> "$LOG_FILE" 2>&1; then + log "WARN" "Failed to stop container gracefully, forcing removal" + ${USE_SUDO}docker kill "$existing_container" >> "$LOG_FILE" 2>&1 + fi + fi + + # Try to remove the container - it might already be gone if started with --rm + if ${USE_SUDO}docker inspect "$existing_container" >/dev/null 2>&1; then + # Container still exists, try to remove it + local retry_count=0 + local max_retries=5 + while [[ $retry_count -lt $max_retries ]]; do + local rm_output + rm_output=$(${USE_SUDO}docker rm "$existing_container" 2>&1) + local rm_exit_code=$? + + if [[ $rm_exit_code -eq 0 ]]; then + log "SUCCESS" "Existing container stopped and removed" + return 0 + elif [[ "$rm_output" == *"No such container"* ]]; then + # Container was removed while we were trying (probably auto-removed with --rm) + log "SUCCESS" "Container was removed automatically" + return 0 + elif [[ "$rm_output" == *"removal of container"*"is already in progress"* ]]; then + # Docker is already removing it, wait a bit longer + log "INFO" "Container removal in progress, waiting..." + sleep 3 + else + # Some other error, log it + echo "$rm_output" >> "$LOG_FILE" + log "WARN" "Attempt $((retry_count + 1))/$max_retries: Failed to remove container, retrying in 2 seconds..." + sleep 2 + fi + + retry_count=$((retry_count + 1)) + done + + log "ERROR" "Failed to remove existing container after $max_retries attempts" + return 1 + else + # Container was already removed (probably had --rm flag) + log "SUCCESS" "Existing container was already removed automatically" + return 0 + fi + else + log "INFO" "No existing containers found" + fi +} + +# Check if Docker image exists locally +check_docker_image_exists() { + local docker_image="$1" + ${USE_SUDO}docker image inspect "$docker_image" >/dev/null 2>&1 +} + +# Pull Docker image if needed +pull_docker_image() { + local docker_image="$1" + + log "INFO" "Checking if Docker image '$docker_image' exists locally..." + + if check_docker_image_exists "$docker_image"; then + log "INFO" "Docker image already exists locally, checking for updates..." + else + log "INFO" "Docker image not found locally, downloading..." + fi + + log "INFO" "Pulling Docker image (this may take several minutes on first run)..." + + # Show progress while pulling + if ! ${USE_SUDO}docker pull "$docker_image" >> "$LOG_FILE" 2>&1; then + log "ERROR" "Failed to pull Docker image: $docker_image" + return 1 + fi + + log "SUCCESS" "Docker image pull completed" + return 0 +} + +# Wait for container to be in running state +wait_for_container_running() { + local max_attempts=60 # Wait up to 60 seconds for container to start + local attempt=1 + + log "INFO" "Waiting for container to enter running state..." + + while [[ $attempt -le $max_attempts ]]; do + # Check if container exists and get its status + local container_status + container_status=$(${USE_SUDO}docker inspect --format='{{.State.Status}}' "$CONTAINER_NAME" 2>/dev/null) + + case "$container_status" in + "running") + log "SUCCESS" "Container is now running" + return 0 + ;; + "created"|"restarting") + log "INFO" "Container is starting (attempt $attempt/$max_attempts)..." + ;; + "exited"|"dead") + log "ERROR" "Container exited unexpectedly. Check container logs:" + ${USE_SUDO}docker logs --tail=20 "$CONTAINER_NAME" >> "$LOG_FILE" 2>&1 + return 1 + ;; + "") + log "INFO" "Container not found yet (attempt $attempt/$max_attempts)..." + ;; + *) + log "WARN" "Container in unexpected state: $container_status (attempt $attempt/$max_attempts)" + ;; + esac + + sleep 1 + ((attempt++)) + done + + log "ERROR" "Container did not reach running state within $max_attempts seconds" + return 1 +} + +# Build and execute Docker command +start_vllm_container() { + local model_key="$1" + local vllm_args="$2" + + log "INFO" "Starting vLLM container for model: $model_key" + + # Get Docker image first + local docker_image + docker_image=$(jq -r '.docker.image' "$CONFIG_FILE") + + # Pull the image first to avoid confusion during container start + if ! pull_docker_image "$docker_image"; then + return 1 + fi + + # Build Docker command + local docker_cmd="${USE_SUDO}docker run -d --name $CONTAINER_NAME" + + # Add port mapping (use user-specified PORT, mapping host port to container port 8000) + docker_cmd="$docker_cmd -p ${PORT}:8000" + + # Add environment variables + docker_cmd="$docker_cmd -e HUGGING_FACE_HUB_TOKEN=$HFToken" + while IFS='=' read -r key value; do + docker_cmd="$docker_cmd -e $key=$value" + done < <(jq -r '.docker.environment | to_entries[] | "\(.key)=\(.value)"' "$CONFIG_FILE") + + # Add volume mounts + while read -r volume; do + [[ -n "$volume" ]] && docker_cmd="$docker_cmd -v $volume" + done < <(jq -r '.docker.volumes[]?' "$CONFIG_FILE") + + # Add Docker image and vLLM arguments + docker_cmd="$docker_cmd --ipc=host $docker_image $vllm_args" + + # Log the command for debugging (truncate if too long to avoid console wrapping) + if [[ ${#docker_cmd} -gt 80 ]]; then + log "INFO" "Docker command: ${docker_cmd:0:80}..." + echo "Full Docker command: $docker_cmd" >> "$LOG_FILE" + else + log "INFO" "Docker command: $docker_cmd" + fi + + log "INFO" "Starting container in detached mode..." + + # Execute Docker command and capture the container ID + local container_id + if ! container_id=$(eval "$docker_cmd" 2>> "$LOG_FILE"); then + log "ERROR" "Failed to start Docker container. Check the log file for details." + log "ERROR" "Recent log entries:" + tail -10 "$LOG_FILE" | while read -r line; do + log "ERROR" " $line" + done + return 1 + fi + + log "INFO" "Container created with ID: ${container_id:0:12}..." + + # Wait for container to be in running state + if ! wait_for_container_running; then + log "ERROR" "Container failed to reach running state" + return 1 + fi + + return 0 +} + +#============================================================================= +# HEALTH CHECK FUNCTIONS +#============================================================================= + +# Perform health check on the started service +perform_health_check() { + log "INFO" "Starting health check on $HEALTHCHECK_URL" + log "INFO" "The vLLM server may take a few minutes to initialize and load the model..." + sleep 10 # Allow vLLM container to begin initialization before polling + + local max_attempts=120 # Number of health check attempts to allow for model loading + local attempt=1 + local last_container_status="" + + while [[ $attempt -le $max_attempts ]]; do + # Check container status first + local container_status + container_status=$(${USE_SUDO}docker inspect --format='{{.State.Status}}' "$CONTAINER_NAME" 2>/dev/null) + + if [[ "$container_status" != "$last_container_status" ]]; then + case "$container_status" in + "running") + log "INFO" "Container is running, waiting for vLLM server to initialize..." + ;; + "exited"|"dead") + log "ERROR" "Container has stopped unexpectedly. Checking logs..." + ${USE_SUDO}docker logs --tail=20 "$CONTAINER_NAME" >> "$LOG_FILE" 2>&1 + return 1 + ;; + "") + log "ERROR" "Container not found during health check" + return 1 + ;; + esac + last_container_status="$container_status" + fi + + # Only proceed with health check if container is running + if [[ "$container_status" == "running" ]]; then + # Show periodic progress + if (( attempt % 5 == 1 )); then + log "INFO" "Health check attempt $attempt/$max_attempts - waiting for vLLM server response..." + fi + + # Perform the actual health check + local http_code + http_code=$(curl -s --max-time 5 -w "%{http_code}" -o /dev/null "$HEALTHCHECK_URL" 2>/dev/null) + + if [[ "$http_code" == "200" ]]; then + log "SUCCESS" "vLLM server is healthy and responding" + printf "${GREEN}✅ vLLM server is running successfully at %s${NC}\n" "$HEALTHCHECK_URL" + return 0 + elif [[ -n "$http_code" && "$http_code" != "000" ]]; then + # We got a response but not 200, show what we got + if (( attempt % 10 == 1 )); then + log "INFO" "Server responding with HTTP $http_code, still initializing..." + fi + fi + fi + + sleep 5 + ((attempt++)) + done + + log "ERROR" "Health check failed after $max_attempts attempts" + printf "${RED}❌ vLLM server failed to start or is not responding${NC}\n" + printf "${YELLOW}The server may still be initializing. Check logs with: ${USE_SUDO}docker logs %s${NC}\n" "$CONTAINER_NAME" + return 1 +} + +#============================================================================= +# MAIN FUNCTION +#============================================================================= + +main() { + # Parse command line arguments first + parse_arguments "$@" + + # Initialize logging + : > "$LOG_FILE" + log "INFO" "Starting vLLM Model Launcher" + log "INFO" "Server will run on port: $PORT" + + # Validate environment + validate_dependencies + validate_environment + + # Load configuration + load_configuration + + # Hardware detection + local parallel_config + parallel_config=$(compute_parallel_config) + + # User interaction + local selected_model + selected_model=$(select_model) + log "INFO" "User selected model: $selected_model" + + # Build configuration + local vllm_args + vllm_args=$(build_vllm_args "$selected_model" "$parallel_config") + + # Container management + if ! stop_existing_container; then + cleanup_and_exit 1 "Failed to remove existing containers" + fi + + if ! start_vllm_container "$selected_model" "$vllm_args"; then + cleanup_and_exit 1 "Failed to start vLLM container" + fi + + # Health check + if perform_health_check; then + log "SUCCESS" "vLLM deployment completed successfully" + exit 0 + else + cleanup_and_exit 1 "vLLM deployment failed" + fi +} + +# Script entry point +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + main "$@" +fi diff --git a/docs/README.md b/docs/README.md index ed9cab23..8c3032da 100644 --- a/docs/README.md +++ b/docs/README.md @@ -12,6 +12,7 @@ Complete all [prerequisites](./prerequisites.md). | Deployment Type | Description | |-----------------------------------------|--------------------------------------------------------------| +| **Single Node (vLLM, non‑production)** | For Quick Testing on Intel® Xeon® processors using vLLM Docker ([Guide](../core/scripts/vllm-quickstart/README.md)) | | **Single Node** | Quick start for testing or lightweight workloads ([Guide](./single-node-deployment.md)) | | **Single Master, Multiple Workers** | For higher throughput workloads ([Guide](./inventory-design-guide.md#single-master-multiple-workload-node-deployment)) | | **Multi-Master, Multiple Workers** | Recommended for HA enterprise clusters ([Guide](./inventory-design-guide.md#multi-master-multi-workload-node-deployment)) | diff --git a/docs/cpu-optimization-guide.md b/docs/cpu-optimization-guide.md index 3faadf27..dadb6888 100644 --- a/docs/cpu-optimization-guide.md +++ b/docs/cpu-optimization-guide.md @@ -7,15 +7,19 @@ The system automatically optimizes CPU allocation for AI models using balloon po ## Automatic Features ### CPU Allocation -- System automatically detects available CPU cores -- Reserves 18% of CPUs for system processes -- Allocates remaining CPUs to AI models -- Assigns dedicated CPU cores to each model -### Memory Allocation -- System automatically detects available memory -- Reserves 18% of memory for system processes -- Allocates remaining memory to AI models +**System CPU Reservation**: A total of **8 vCPUs** is reserved for infrastructure components (Keycloak, APISIX, observability, kube-system), distributed evenly across NUMA nodes. + +**Intelligent CPU Selection**: +- Automatically detects NUMA topology and hyperthreading configuration +- For hyperthreaded systems: Balances reservations between physical cores and HT siblings + - Example (48 cores with HT): Reserves from both physical cores (0-23) and HT cores (24-47) +- For non-segmented CPUs (e.g., "0-47"): Creates virtual segments at the midpoint +- For segmented CPUs (e.g., "0-23,48-71"): Uses existing segment boundaries + +**Model CPU Allocation**: +- Remaining CPUs (after reservation) are allocated to LLM models +- Assigns dedicated CPU cores to each model for optimal performance ### Hardware Detection - Automatically detects NUMA topology @@ -37,17 +41,16 @@ labels: resources: requests: cpu: 40 # Automatically calculated - memory: 4G + # for tp1, tp2 system should have minimum 128Gi and for tp>=4 minimum 256Gi memory available for the model's pod + memory: 128Gi ``` -## Recommendations for Single Node Clusters with Limited CPUs +## System Component Deployment Recommendations -For single node clusters (e.g., systems with 48 CPU cores), only Keycloak and APISIX are supported. GenAI Gateway is not supported on these configurations. To deploy GenAI Gateway, a minimum of 96 CPU cores is required. +For single-node Xeon clusters, **Keycloak** and **APISIX** are recommended. + +For Gaudi or large multi-node Xeon clusters, the GenAI Gateway is well-suited. -**Summary:** -- For clusters with limited CPU resources, deploy only Keycloak and APISIX. -- GenAI Gateway deployment requires at least 96 CPU cores. - ## Status Verification ### Check System Status diff --git a/docs/examples/multi-node/inference-config.cfg b/docs/examples/multi-node/inference-config.cfg index 69677497..886ece50 100644 --- a/docs/examples/multi-node/inference-config.cfg +++ b/docs/examples/multi-node/inference-config.cfg @@ -17,4 +17,7 @@ deploy_observability=off deploy_llm_models=on deploy_ceph=off deploy_istio=off -uninstall_ceph=off \ No newline at end of file +uninstall_ceph=off + +# Agentic AI Plugin +deploy_agenticai_plugin=off \ No newline at end of file diff --git a/docs/examples/single-node/einf-singlenode-gaudi.yml b/docs/examples/single-node/einf-singlenode-gaudi.yml index 46c3f91d..9500fd45 100644 --- a/docs/examples/single-node/einf-singlenode-gaudi.yml +++ b/docs/examples/single-node/einf-singlenode-gaudi.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Ansible Playbook to install and configure the Enterprise Inference Service on a Single Gaudi node running Ubuntu 22.04+ diff --git a/docs/examples/single-node/einf-singlenode-xeon.yml b/docs/examples/single-node/einf-singlenode-xeon.yml index 2128cc95..54bd4ee9 100644 --- a/docs/examples/single-node/einf-singlenode-xeon.yml +++ b/docs/examples/single-node/einf-singlenode-xeon.yml @@ -1,4 +1,4 @@ -# Copyright (C) 2024-2025 Intel Corporation +# Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # Ansible Playbook to install and configure the Enterprise Inference Service on a Single Xeon node running Ubuntu 22.04+ diff --git a/docs/examples/single-node/inference-config.cfg b/docs/examples/single-node/inference-config.cfg index 6a28b89f..5069a41f 100644 --- a/docs/examples/single-node/inference-config.cfg +++ b/docs/examples/single-node/inference-config.cfg @@ -15,3 +15,6 @@ deploy_llm_models=on deploy_ceph=off deploy_istio=off uninstall_ceph=off + +# Agentic AI Plugin +deploy_agenticai_plugin=off diff --git a/docs/getting-started-deploy-MCP.md b/docs/getting-started-deploy-MCP.md new file mode 100644 index 00000000..65723a24 --- /dev/null +++ b/docs/getting-started-deploy-MCP.md @@ -0,0 +1,468 @@ +# Deploying MCP Servers on Enterprise Inference Stack + +--- + +## Overview + +This guide provides step-by-step instructions for deploying a FastMCP (Model Context Protocol) server on the Enterprise Inference Stack. By following this guide, you will create a containerized MCP server and deploy it on your Kubernetes cluster with enterprise-grade security and network accessibility. + +The deployment process includes creating the MCP application, containerizing it with Docker, pushing it to a registry, and deploying it using Helm charts with OIDC-based authentication. + +--- + +## Table of Contents + +1. [Architecture Overview](#architecture-overview) +2. [Prerequisites](#prerequisites) +3. [Implementation](#implementation) +4. [Containerization](#containerization) +5. [Registry and Image Management](#registry-and-image-management) +6. [Helm Chart Configuration](#helm-chart-configuration) +7. [OIDC Authentication Setup](#oidc-authentication-setup) +8. [Deployment](#deployment) +9. [Deploying Pre-Built MCP Server Images](#deploying-pre-built-mcp-server-images) +10. [Troubleshooting](#troubleshooting) +11. [References](#references) + +--- + +## Architecture Overview + +The Enterprise Inference Stack uses a **remote HTTP deployment model** for MCP servers. This deployment architecture provides: + +- **Network Accessibility**: Your MCP server is accessible over HTTP/HTTPS endpoints +- **Multi-Client Support**: Multiple client applications can connect simultaneously +- **Enterprise Security**: OIDC-based authentication protects your service +- **Kubernetes-Native**: Runs on the Enterprise Inference Stack's Kubernetes cluster +- **Horizontal Scalability**: Scale seamlessly using Horizontal Pod Autoscaler (HPA) based on CPU and memory utilization + +### Transport Protocol + +- **Transport Type**: HTTP/HTTPS (Remote Deployment) +- **Default Port**: 8000 +- **Base Path**: `/demo/mcp` +- **Health Check Endpoint**: `/health` +- **Protocol Model**: Streaming HTTP + +## Prerequisites + +Ensure you have the following prerequisites in place: + +### Required Infrastructure + +- Enterprise Inference Stack (configured with `deploy_keycloak_apisix=on` for OIDC authentication support) +- Docker runtime for image building and local testing +- Helm 3.x for chart-based deployments +- Container registry (private or public) for image storage + +### Required Tools + +- Python 3.12 or later +- pip package manager +- Docker CLI +- kubectl configured with cluster access +- Helm CLI + +### Required Permissions + +- Write access to container registry +- Cluster administrator privileges or appropriate RBAC permissions +- Access to OIDC provider configuration +- Access to Enterprise Inference Stack configuration repository + +--- + +## Implementation + +### Step 1: Create Your MCP Server Application + +Create `my_mcp_server.py` with the following implementation: + +```python +from fastmcp import FastMCP +from starlette.responses import JSONResponse + +# Initialize the MCP server +mcp = FastMCP("My MCP Server") + +@mcp.tool +def add_numbers(numbers: list[float]) -> str: + """ + Adds a list of numbers and returns the result as a string. + + Args: + numbers (list[float]): A list of numbers to be added. + + Returns: + str: A message with the sum of the input numbers. + + Note: + This tool should be used for all addition requests, regardless of complexity. + """ + total = sum(numbers) + return f"The sum is {total}" + +@mcp.custom_route("/health", methods=["GET"]) +async def health_check(request): + return JSONResponse({"status": "healthy", "service": "My MCP Server"}) + +# Create ASGI application +app = mcp.http_app(path="/demo/mcp", stateless_http=True) +``` + +**Implementation Notes:** +- The `@mcp.tool` decorator exposes the `add_numbers` function as an MCP tool +- Custom health check endpoint enables Kubernetes liveness and readiness probes +- ASGI application is configured with the `/demo/mcp` base path for routing + +### Step 2: Define Python Dependencies + +Create `requirements.txt` with the following dependencies: + +``` +fastmcp<3 +uvicorn +``` + +--- + +## Containerization + +### Step 3: Create a Dockerfile + +```dockerfile +# Use a lightweight Python base image +FROM python:3.12-slim + +# Set up working directory +WORKDIR /app + +# Copy dependencies first for Docker build caching +COPY requirements.txt . + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the MCP server code and any application related files +COPY my_mcp_server.py . + +# Expose the port used by the FastMCP server (8000) +EXPOSE 8000 + +# Run the server using uvicorn (ASGI) +# my_mcp_server:app → means "from my_mcp_server.py import app" +# host=0.0.0.0 so container is accessible externally +CMD ["uvicorn", "my_mcp_server:app", "--host", "0.0.0.0", "--port", "8000"] +``` + +**Build Configuration Details:** +- **Base Image**: `python:3.12-slim` for minimal footprint and security updates +- **Layer Optimization**: Dependencies layer placed before application code to leverage Docker build cache +- **Security**: Non-root execution recommended (implement in production environments) +- **Networking**: Host binding on `0.0.0.0` enables external accessibility within cluster + +### Build Your Docker Image + +Run the following command: + +```bash +docker build -t my_mcp_server:latest . +``` + +**Options:** +- `-t my_mcp_server:latest`: Tags the image with repository and version +- Standard build context with Dockerfile in current directory + +--- + +## Registry and Image Management + +### Step 4: Push to Container Registry + +Upload your image to your container registry: + +```bash +docker tag my_mcp_server:latest /my_mcp_server:1.0.0 +docker push /my_mcp_server:1.0.0 +``` + +**Configuration Parameters:** +- ``: Replace with your container registry URL (e.g., `ecr.aws.com/my-org` or `gcr.io/my-project`) +- Image naming follows standard Docker registry conventions +- Ensure registry credentials are configured in your local Docker context + +--- + +## Helm Chart Configuration + +### Step 5: Update the Helm Chart Values + +The MCP server helm chart is located in the [core/helm-charts/mcp-server-template/](../core/helm-charts/mcp-server-template/) directory. + +Update the `values.yaml` file in this directory with your deployment configuration: + +#### Update Container Image + +```yaml +# values.yaml +image: + repository: /my_mcp_server + tag: "1.0.0" # Use semantic versioning, never 'latest' in production + pullPolicy: Always # Always pull to ensure correct version with registry + # pullSecrets: [] +``` + +#### Service Configuration + +```yaml +service: + type: ClusterIP + port: 8000 + targetPort: 8000 + annotations: {} +``` + +#### Ingress Configuration + +```yaml +ingress: + enabled: true + className: "nginx" + namespace: auth-apisix + host: api.example.com # Replace with your ingress hostname + # MCP endpoint path - customize for your deployment + path: /demo/mcp + annotations: + nginx.ingress.kubernetes.io/use-regex: "true" + # Streaming-friendly settings for MCP Streamable HTTP (per MCP best practices) + nginx.ingress.kubernetes.io/proxy-buffering: "off" + nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-http-version: "1.1" + tls: + enabled: true + secretName: mcp-server-tls # Replace with your TLS secret name +``` + +--- + +## OIDC Authentication Setup + +### Step 6: Configure OpenID Connect (OIDC) + +Configure OIDC to secure your MCP server with enterprise authentication: + +#### OIDC Configuration Parameters + +Update the OIDC settings in `values.yaml`: + +```yaml +oidc: + realm: master + client_id: "" # Update with value from generate-token.sh + client_secret: "" # Update with value from generate-token.sh + discovery: http://keycloak.default.svc.cluster.local/realms/master/.well-known/openid-configuration + introspection_endpoint: http://keycloak.default.svc.cluster.local/realms/master/protocol/openid-connect/token/introspect +``` + +### Step 7: Generate OIDC Credentials + +Execute the helper script to generate ClientID and Client Secret: + +```bash +source core/scripts/generate-token.sh +``` + +**Prerequisites:** + +Before running the script, update the following environment variables in `core/scripts/generate-token.sh` according to your cluster configuration: + +```bash +export BASE_URL="api.example.com" # Base URL of Keycloak server (without https://) +export KEYCLOAK_ADMIN_USERNAME="your-keycloak-admin-user" # Keycloak admin username +export KEYCLOAK_PASSWORD="changeme" # Keycloak admin password +export KEYCLOAK_CLIENT_ID="my-client-id" # Client ID to be created in Keycloak +``` + +**Output:** + +The script generates and displays the following values: + +- `BASE_URL`: The base URL for your Keycloak server +- `KEYCLOAK_CLIENT_SECRET`: The confidential client secret for authentication +- `TOKEN`: Bearer token for API access (format: `bearer {TOKEN}`) + +Copy these values and update the `values.yaml` file with the generated `clientId` and `clientSecret`. + + +--- + +## Deployment + +### Step 8: Deploy Using Helm + +You can deploy your MCP server to the Enterprise Inference Stack by running the following command from the root folder of the repository: + +```bash +helm install mcp-server ./core/helm-charts/mcp-server-template \ + --namespace default \ + --set image.repository=/my_mcp_server \ + --set image.tag=1.0.0 \ + --set oidc.client_id= \ + --set oidc.client_secret= \ + --set ingress.host= \ + --set ingress.tls.secretName= +``` + +### Step 9: Verify Deployment + +```bash +# Check pod status +kubectl get pods -n default -l app.kubernetes.io/instance=mcp-server + +# Verify service status +kubectl get svc -n default -l app.kubernetes.io/instance=mcp-server + +# Check ingress configuration +kubectl get ingress -n auth-apisix -l app.kubernetes.io/instance=mcp-server + +# View pod logs +kubectl logs -n default -l app.kubernetes.io/instance=mcp-server -f + +# Test health endpoint +curl https:///health +``` + +### Step 10: Validate Functionality + +Your MCP server is now accessible and ready to accept connections from MCP clients. + +**Endpoint Details:** +- **URL**: `https:///demo/mcp` +- **Authentication**: OIDC Bearer Token (from Step 7 output) +- **Protocol**: Streamable HTTP + +**Connecting from an MCP Client:** + +You can connect your deployed MCP server using any MCP-compatible client. The client should be able to discover and invoke the tools and actions exposed by your MCP server. + +**Example: Using Flowise AI** + +To connect your MCP server from Flowise AI or similar MCP clients, provide the following configuration: + +```json +{ + "url": "https:///demo/mcp", + "headers": { + "Authorization": "Bearer {{$vars.MCPAuthToken}}" + } +} +``` + +Replace: +- ``: Your actual ingress hostname (e.g., `api.example.com`) +- `{{$vars.MCPAuthToken}}`: Your OIDC bearer token (obtained from the `TOKEN` output in Step 7) + +Once connected, you should be able to list and invoke the available tools and actions exposed by your MCP server. + + +--- + +## Deploying Pre-Built MCP Server Images + +The MCP server helm chart is located in the [core/helm-charts/mcp-server-template/](../core/helm-charts/mcp-server-template/) directory. + +To deploy any pre-built MCP server from Docker Hub or other registries, each server has its own specific configuration requirements: + +1. Find the image repository (e.g., `mcp/brave-search`) +2. Check the server documentation for required environment variables and any secrets specific to that server +3. Use the same Helm deployment pattern, but replace the `env` parameters with the server's specific requirements—each MCP server will have different secrets and environment variables that must be properly configured +4. Ensure all required secrets (API keys, tokens, credentials, etc.) are set correctly before deployment + +**Note**: The Helm chart has been designed and optimized for the **streamable HTTP transport protocol**. This ensures compatibility with any pre-built MCP server that uses HTTP-based communication, providing proper streaming support and connection management out of the box. + +### Example: Brave Search MCP Server + +Here's how to deploy the **Brave Search MCP Server** as a practical example. + +**Docker Hub Repository**: https://hub.docker.com/r/mcp/brave-search + +#### Deployment Command +Replace the placeholders with your values (OIDC credentials from Step 7, API keys, ingress hostname, etc.). + +You can now deploy MCP server to the Enterprise Inference Stack by running the following command from the root folder of the repository: + +```bash +export BRAVE_API_KEY="Your BRAVE_API_KEY" +helm install mcp-server ./core/helm-charts/mcp-server-template -n default \ + --set image.repository=mcp/brave-search \ + --set image.tag=latest \ + --set env[0].name=BRAVE_API_KEY \ + --set env[0].value="$BRAVE_API_KEY" \ + --set env[1].name=BRAVE_MCP_TRANSPORT \ + --set env[1].value="http" \ + --set env[2].name=BRAVE_MCP_PORT \ + --set-string env[2].value="8000" \ + --set env[3].name=BRAVE_MCP_HOST \ + --set env[3].value="0.0.0.0" \ + --set env[4].name=BRAVE_MCP_ENABLED_TOOLS \ + --set env[4].value="brave_news_search" \ + --set oidc.client_id= \ + --set oidc.client_secret= \ + --set ingress.host= \ + --set ingress.tls.secretName= \ + --set ingress.path=/mcp +``` + +Tip: Put your custom values in a file (for example `values.override.yaml`) and deploy with: + +```bash +helm upgrade --install mcp-server mcp-server-template -n default -f values.override.yaml +``` + +Security: Do not store plaintext secrets in that file. Create Kubernetes `Secret` objects and reference them from `values.override.yaml`. Use `--set` or `--set-file` only for small, non-sensitive overrides when appropriate. + +#### Verification + +```bash +# Check pod status +kubectl get pods -n default -l app.kubernetes.io/instance=mcp-server + +# View logs +kubectl logs -n default -l app.kubernetes.io/instance=mcp-server -f + +# Access your MCP endpoint +https:///mcp +``` + +--- + +## Troubleshooting + +### Common Issues and Resolution + +**Issue: ImagePullBackOff** +- **Cause**: Container image not accessible in registry +- **Resolution**: Verify image exists and registry credentials are configured in cluster + +**Issue: CrashLoopBackOff** +- **Cause**: Application startup failure +- **Resolution**: Check pod logs via `kubectl logs ` + +**Issue: 401 Unauthorized on Endpoints** +- **Cause**: OIDC token invalid or misconfigured +- **Resolution**: Verify OIDC configuration and token validity + +**Issue: Health Check Failures** +- **Cause**: Application not responding on `/health` endpoint +- **Resolution**: Verify application startup and port binding + +--- + +## References + +- [FastMCP HTTP Deployment Guide](https://gofastmcp.com/deployment/http) +- [FlowiseAI Tools & MCP](https://docs.flowiseai.com/tutorials/tools-and-mcp) +- [Docker Hub MCP Repositories](https://hub.docker.com/mcp/explore) + +--- diff --git a/docs/prerequisites.md b/docs/prerequisites.md index 48766a11..6480cfd8 100644 --- a/docs/prerequisites.md +++ b/docs/prerequisites.md @@ -15,7 +15,7 @@ The first step is to get access to the hardware platforms. This guide assumes th | Category | Details | |---------------------|-------------------------------------------------------------------------------------------------------------------| -| Operating System | Ubuntu 22.04 | +| Operating System | Ubuntu 22.04, Ubuntu 24.04 | | Hardware Platforms | 4th Gen Intel® Xeon® Scalable processors
5th Gen Intel® Xeon® Scalable processors
6th Gen Intel® Xeon® Scalable processors
3rd Gen Intel® Xeon® Scalable processors and Intel® Gaudi® 2 AI Accelerator
4th Gen Intel® Xeon® Scalable processors and Intel® Gaudi® 2 AI Accelerator
6th Gen Intel® Xeon® Scalable processors and Intel® Gaudi® 3 AI Accelerator| | Gaudi Firmware Version | 1.20.0 or newer diff --git a/docs/single-node-deployment.md b/docs/single-node-deployment.md index 5d1aaf91..65e0bd76 100644 --- a/docs/single-node-deployment.md +++ b/docs/single-node-deployment.md @@ -8,6 +8,12 @@ Before running the automation, it is recommended to complete all [prerequisites] 2. [SSL/TLS Certificate Setup for Development Environment](./prerequisites.md#development-environment) 3. [Hugging Face Token Generation](./prerequisites.md#hugging-face-token-generation) +## System Component Deployment Recommendations + +For single-node Xeon clusters, **Keycloak** and **APISIX** are recommended. + +For Gaudi or large multi-node Xeon clusters, the GenAI Gateway is well-suited. + ## Deployment ### Step 1: Configure the Automation config file diff --git a/plugins/agenticai/docs/agenticai-quickstart.md b/plugins/agenticai/docs/agenticai-quickstart.md new file mode 100644 index 00000000..8fa7b541 --- /dev/null +++ b/plugins/agenticai/docs/agenticai-quickstart.md @@ -0,0 +1,272 @@ +# Agentic AI Plugin - Quick Start Guide + +## Overview + +The **Agentic AI Plugin** provides a visual platform for building AI agents, multi-agent systems, and intelligent workflows. The current implementation uses **Flowise**, an open-source drag-and-drop tool for creating conversational AI, RAG applications, and workflow automation without coding. + +**About Flowise:** [Official Documentation](https://docs.flowiseai.com/) | [GitHub](https://github.com/FlowiseAI/Flowise) + +**Key Features:** +- Visual workflow builder with drag-and-drop interface +- Pre-built agent templates and marketplace +- Multi-agent collaboration support +- Integration with deployed LLM models +- RAG (Retrieval Augmented Generation) support +- API integration for external services + +--- + +## Deployment + +### Prerequisites +- Intel® AI for Enterprise Inference automation deployed +- Kubernetes cluster with ingress controller +- TLS certificate with Flowise subdomain in SANs (flowise-) + +### Step 1: Enable Plugin + +Edit the main configuration: +```bash +vim core/inventory/inference-config.cfg +``` + +Set: +```properties +deploy_agenticai_plugin=on +``` + +### Step 2: Deploy + +```bash +cd core +bash inference-stack-deploy.sh +``` + +Select: `1) Provision Enterprise Inference Cluster` + +### Step 3: Verify + +```bash +kubectl get pods -n flowise +``` + +Expected output: +``` +NAME READY STATUS RESTARTS AGE +flowise-xxxxx 1/1 Running 0 5m +flowise-postgresql-0 1/1 Running 0 5m +flowise-redis-master-0 1/1 Running 0 5m +flowise-worker-xxxxx 1/1 Running 0 5m +``` + +--- + +## Initial Setup + +### Accessing the Platform + +Open in browser: +``` +https://flowise- +``` + +> **Note:** The subdomain is "flowise" as this is the current implementation. Future versions may support custom subdomains. + +### First Time Setup (Account Creation) + +When you first access the platform, you'll see the **Setup Account** page: + +1. **Administrator Name:** Your display name (e.g., "John Doe") +2. **Administrator Email:** Valid email address - **this becomes your login ID** +3. **Password:** Must contain: + - At least 8 characters + - One lowercase letter + - One uppercase letter + - One digit + - One special character +4. **Confirm Password:** Re-enter password +5. Click **"Sign Up"** + +> **Important:** Account setup is local to your server. No external connections are made. Your data stays on your infrastructure. + +### Subsequent Logins + +After account creation, use: +- **Email:** The email you registered +- **Password:** Your chosen password + +--- + +## Using the Platform + +### Add a Credential + +Flowise stores API keys and credentials that can be reused by workflow nodes. Credentials are encrypted in the database. + +1. In the left sidebar, click **Credentials** +2. Click **Add Credential** +3. Choose **OpenAI API** +4. Provide: + - **Credential Name**: e.g., `InternalLLM` + - **API Key**: you can enter `sk-dummy` (for internal models) +5. Click **Save** + +⚠️ This UI uses OpenAI API credential type because Flowise nodes expect this format; for internal models there may not be a real API key. + +### Connecting to Deployed Models + +The Agentic AI Plugin is designed to work seamlessly with models deployed on the same Kubernetes cluster, avoiding external network calls for better performance and security. + +#### Using Locally Deployed Models + +**For models deployed on the same cluster:** + +Since your LLM models are deployed within the same Kubernetes cluster as Flowise, use internal service endpoints for optimal performance: + +1. Add **"Custom Chat Model"** or **"OpenAI Compatible"** node to your workflow +2. Configure with Kubernetes internal service endpoint: + - **Base URL/Endpoint:** `http://..svc.cluster.local:/v1` + - Example: `http://llama-2-7b-service.default.svc.cluster.local:8000/v1` + - **Model Name/ID:** Your model identifier + - Example: `meta-llama/Llama-2-7b-chat-hf` + - **API Key:** `sk-dummy` (use APIKey from keycloak or GenAI gateway) + +**Find your deployed model services:** +```bash +kubectl get svc | grep -E "vllm" +``` + +**Benefits of using internal endpoints:** +- ✅ **Faster:** No network egress/ingress - direct cluster networking +- ✅ **Secure:** Traffic stays within the cluster +- ✅ **No External Costs:** No internet bandwidth charges +- ✅ **Lower Latency:** Milliseconds vs. seconds + +#### Using External/Cloud Models (Optional) + +**For models hosted externally or in the cloud:** + +If you need to use OpenAI, Anthropic, or other external models: + +1. Add the appropriate chat model node (e.g., "ChatOpenAI", "ChatAnthropic") +2. Configure: + - **Endpoint:** Cloud provider endpoint (e.g., `https://api.openai.com/v1`) + - **Model ID:** Cloud model name (e.g., `gpt-4`, `claude-3-opus`) + - **API Key:** Your cloud provider API key + +#### Template Configuration + +The included `software-team.json` template uses placeholder values that you should replace: +- **`your-model-endpoint`** → Replace with actual service endpoint +- **`your-model-id`** → Replace with actual model name/ID + +After importing the template, update all LLM nodes with your model configuration. + + +### How to Load an sample AgentFlow Template + +1. From the **left sidebar**, click **AgentFlows** +2. Click **Add New** +3. An empty Agent editor will open + (You will see a blank canvas with the title “Untitled Agent”) +4. In the **top-right corner**, Click **Settings** gear icon and select **Load Agents** +5. Select the provided agent template `.json` file. + Pre-built agentflow template available at: + ``` + plugins/agenticai/templates/software-team.json + ``` +6. The agent configuration will load automatically +7. In all the LLM Nodes choose the credentials that is created in above step. And also update model and basepath. +8. Click the **Save (💾) icon** +9. Enter a name for the agent and save + + +--- + +## Administration + +### Common Commands + +**View Logs:** +```bash +kubectl logs -n flowise -l app.kubernetes.io/name=flowise -f +kubectl logs -n flowise -l app=flowise-worker -f +``` + +**Check Status:** +```bash +kubectl get pods,svc,ingress -n flowise +``` + +**Database Backup:** +```bash +kubectl exec -n flowise flowise-postgresql-0 -- pg_dump -U flowise flowise > flowise-backup.sql +``` + +**Restart Platform:** +```bash +kubectl rollout restart deployment/flowise -n flowise +``` + +**Scale Workers:** +```bash +kubectl scale deployment flowise-worker -n flowise --replicas=5 +``` + +### Database Passwords + +Backend passwords (PostgreSQL, Redis) are auto-generated during deployment and stored in: +``` +core/kubespray/config/vault.yml +``` + +Variables: +- `agenticai_postgres_password` +- `agenticai_redis_password` + +> **Note:** User login passwords are set by users themselves during account creation, not from vault. + +--- + +## Troubleshooting + +### Cannot Access UI + +**Check ingress:** +```bash +kubectl get ingress -n flowise +kubectl describe ingress flowise -n flowise +``` + +**Verify certificate includes subdomain:** +```bash +openssl s_client -connect flowise-:443 -servername flowise- < /dev/null | openssl x509 -noout -text | grep DNS +``` + +### Pods Not Starting + +```bash +kubectl describe pod -n flowise +kubectl logs -n flowise --previous +``` + +### Database Connection Issues + +```bash +# Test connectivity from Flowise pod +kubectl exec -n flowise -- nc -zv flowise-postgresql 5432 + +# Check PostgreSQL logs +kubectl logs -n flowise flowise-postgresql-0 +``` + +--- + +## Additional Resources + +- Official Documentation: https://docs.flowiseai.com/ +- GitHub Repository: https://github.com/FlowiseAI/Flowise +- Community Discord: https://discord.gg/jbaHfsRVBW +- Example Workflows: https://docs.flowiseai.com/use-cases + +--- diff --git a/plugins/agenticai/playbooks/deploy-agenticai-plugin.yml b/plugins/agenticai/playbooks/deploy-agenticai-plugin.yml new file mode 100644 index 00000000..fd3d8d26 --- /dev/null +++ b/plugins/agenticai/playbooks/deploy-agenticai-plugin.yml @@ -0,0 +1,222 @@ +# Copyright (C) 2024-2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +--- +- name: Deploy Agentic AI Plugin + hosts: "{{ inference_delegate | default('kube_control_plane') }}" + gather_facts: false + any_errors_fatal: "{{ any_errors_fatal | default(true) }}" + environment: "{{ proxy_disable_env | default(env_proxy | default({})) }}" + + vars_files: + - "{{ lookup('env', 'PWD') }}/config/vars/inference_common.yml" + - "{{ lookup('env', 'PWD') }}/config/vars/inference_delegate.yml" + - "{{ lookup('env', 'PWD') }}/../../plugins/agenticai/vars/agenticai-plugin-vars.yml" + - "{{ lookup('env', 'PWD') }}/config/vault.yml" + + roles: + - role: inference-tools + + tasks: + ########################################################################### + # Info + ########################################################################### + - name: Display Flowise Deployment Configuration + debug: + msg: + - "==============================================" + - "Deploying Flowise Agentic Plugin" + - "Namespace: {{ agenticai_namespace }}" + - "Domain: flowise-{{ cluster_url }}" + - "Ingress Class: {{ agenticai_ingress_class }}" + - "Worker Enabled: {{ agenticai_worker_enabled }}" + - "==============================================" + run_once: true + + ########################################################################### + # Namespace + ########################################################################### + - name: Ensure Flowise Namespace Exists + kubernetes.core.k8s: + state: present + definition: + apiVersion: v1 + kind: Namespace + metadata: + name: "{{ agenticai_namespace }}" + run_once: true + + ########################################################################### + # TLS Secret + ########################################################################### + - name: Create TLS Secret for Flowise + kubernetes.core.k8s: + state: present + definition: + apiVersion: v1 + kind: Secret + type: kubernetes.io/tls + metadata: + name: "flowise-{{ cluster_url }}" + namespace: "{{ agenticai_namespace }}" + data: + tls.crt: "{{ lookup('file', cert_file) | b64encode }}" + tls.key: "{{ lookup('file', key_file) | b64encode }}" + when: agenticai_tls_enabled | bool + run_once: true + + ########################################################################### + # Helm Repo + ########################################################################### + - name: Add Flowise Helm Repository + command: > + helm repo add {{ agenticai_helm_repo_name }} {{ agenticai_helm_repo_url }} + register: helm_repo_add + changed_when: "'already exists' not in helm_repo_add.stderr" + failed_when: helm_repo_add.rc != 0 and 'already exists' not in helm_repo_add.stderr + run_once: true + + - name: Update Helm Repositories + command: helm repo update + run_once: true + + ########################################################################### + # Validate Required Secrets + ########################################################################### + - name: Validate PostgreSQL Password is Set + fail: + msg: | + ERROR: PostgreSQL password is not set! + Please ensure 'agenticai_postgres_password' is defined in vault.yml. + This is required for production security. + when: agenticai_postgres_password is not defined or agenticai_postgres_password | length == 0 + run_once: true + + - name: Validate Redis Password is Set + fail: + msg: | + ERROR: Redis password is not set! + Please ensure 'agenticai_redis_password' is defined in vault.yml. + This is required for production security. + when: agenticai_redis_password is not defined or agenticai_redis_password | length == 0 + run_once: true + + ########################################################################### + # Deploy Flowise (NO ingress via Helm) + ########################################################################### + - name: Install or Upgrade Flowise via Helm + shell: | + helm upgrade --install flowise {{ agenticai_helm_repo_name }}/{{ agenticai_helm_chart_name }} \ + --namespace {{ agenticai_namespace }} \ + --create-namespace \ + --set image.repository={{ agenticai_image_repository }} \ + --set image.tag={{ agenticai_image_tag }} \ + --set replicaCount={{ agenticai_replica_count }} \ + --set env.DATABASE_TYPE={{ agenticai_database_type }} \ + --set env.DATABASE_HOST=flowise-postgresql.{{ agenticai_namespace }}.svc.cluster.local \ + --set env.DATABASE_PORT={{ agenticai_postgres_port }} \ + --set env.DATABASE_NAME={{ agenticai_postgres_database }} \ + --set env.DATABASE_USER={{ agenticai_postgres_username }} \ + --set env.DATABASE_PASSWORD={{ agenticai_postgres_password }} \ + --set env.REDIS_URL=redis://:{{ agenticai_redis_password }}@flowise-redis-master.{{ agenticai_namespace }}.svc.cluster.local:{{ agenticai_redis_port }} \ + --set postgresql.enabled={{ agenticai_postgres_enabled }} \ + --set postgresql.auth.username={{ agenticai_postgres_username }} \ + --set postgresql.auth.password={{ agenticai_postgres_password }} \ + --set postgresql.auth.database={{ agenticai_postgres_database }} \ + --set postgresql.primary.persistence.size={{ agenticai_postgres_storage_size }} \ + --set redis.enabled={{ agenticai_redis_enabled }} \ + --set redis.auth.password={{ agenticai_redis_password }} \ + --set redis.master.persistence.size={{ agenticai_redis_storage_size }} \ + --set worker.enabled={{ agenticai_worker_enabled }} \ + --set worker.replicaCount={{ agenticai_worker_replica_count }} \ + --set worker.resources.requests.cpu={{ agenticai_worker_cpu_request }} \ + --set worker.resources.requests.memory={{ agenticai_worker_memory_request }} \ + --set worker.resources.limits.cpu={{ agenticai_worker_cpu_limit }} \ + --set worker.resources.limits.memory={{ agenticai_worker_memory_limit }} \ + --set resources.requests.cpu={{ agenticai_cpu_request }} \ + --set resources.requests.memory={{ agenticai_memory_request }} \ + --set resources.limits.cpu={{ agenticai_cpu_limit }} \ + --set resources.limits.memory={{ agenticai_memory_limit }} \ + --set persistence.enabled={{ agenticai_persistence_enabled }} \ + --set persistence.storageClass={{ agenticai_persistence_storage_class }} \ + --set persistence.size={{ agenticai_persistence_size }} \ + --set ingress.enabled=false \ + --wait \ + --timeout 10m + run_once: true + + ########################################################################### + # Root Ingress (SUBDOMAIN MODE) + ########################################################################### + - name: Create Flowise Root Ingress (Subdomain) + kubernetes.core.k8s: + state: present + definition: + apiVersion: networking.k8s.io/v1 + kind: Ingress + metadata: + name: flowise-root + namespace: "{{ agenticai_namespace }}" + spec: + ingressClassName: "{{ agenticai_ingress_class }}" + tls: + - secretName: "flowise-{{ cluster_url }}" + hosts: + - "flowise-{{ cluster_url }}" + rules: + - host: "flowise-{{ cluster_url }}" + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: flowise + port: + number: 3000 + when: kubernetes_platform != 'openshift' and agenticai_ingress_enabled | bool + run_once: true + + ########################################################################### + # Wait for Pods + ########################################################################### + - name: Display Wait Message + debug: + msg: + - "==============================================" + - "Waiting for Flowise pods to be ready..." + - "This may take several minutes as PostgreSQL" + - "and Redis initialize for the first time." + - "==============================================" + run_once: true + + - name: Wait for Flowise Pods to be Ready + shell: | + kubectl get pods -n {{ agenticai_namespace }} \ + -l app.kubernetes.io/name=flowise \ + -o json | jq -r ' + .items[] | + select(.status.phase != "Running" + or (.status.containerStatuses[]? | select(.ready != true))) | + .metadata.name' | wc -l + register: pod_status + until: pod_status.stdout == "0" + retries: 60 + delay: 10 + run_once: true + + ########################################################################### + # Summary + ########################################################################### + - name: Display Deployment Summary + debug: + msg: + - "==============================================" + - "Flowise Agentic Plugin Deployed Successfully!" + - "==============================================" + - "Namespace: {{ agenticai_namespace }}" + - "Access URL: https://flowise-{{ cluster_url }}" + - "Database: PostgreSQL" + - "Cache: Redis" + - "Worker Mode: {{ agenticai_worker_enabled }}" + - "==============================================" + run_once: true diff --git a/plugins/agenticai/templates/software-team.json b/plugins/agenticai/templates/software-team.json new file mode 100644 index 00000000..415a71cd --- /dev/null +++ b/plugins/agenticai/templates/software-team.json @@ -0,0 +1,2081 @@ +{ + "nodes": [ + { + "id": "startAgentflow_0", + "type": "agentFlow", + "position": { + "x": 100.79370050359839, + "y": 100 + }, + "data": { + "id": "startAgentflow_0", + "label": "Start", + "version": 1.1, + "name": "startAgentflow", + "type": "Start", + "color": "#7EE787", + "hideInput": true, + "baseClasses": [ + "Start" + ], + "category": "Agent Flows", + "description": "Starting point of the agentflow", + "inputParams": [ + { + "label": "Input Type", + "name": "startInputType", + "type": "options", + "options": [ + { + "label": "Chat Input", + "name": "chatInput", + "description": "Start the conversation with chat input" + }, + { + "label": "Form Input", + "name": "formInput", + "description": "Start the workflow with form inputs" + } + ], + "default": "chatInput", + "id": "startAgentflow_0-input-startInputType-options", + "display": true + }, + { + "label": "Form Title", + "name": "formTitle", + "type": "string", + "placeholder": "Please Fill Out The Form", + "show": { + "startInputType": "formInput" + }, + "id": "startAgentflow_0-input-formTitle-string", + "display": false + }, + { + "label": "Form Description", + "name": "formDescription", + "type": "string", + "placeholder": "Complete all fields below to continue", + "show": { + "startInputType": "formInput" + }, + "id": "startAgentflow_0-input-formDescription-string", + "display": false + }, + { + "label": "Form Input Types", + "name": "formInputTypes", + "description": "Specify the type of form input", + "type": "array", + "show": { + "startInputType": "formInput" + }, + "array": [ + { + "label": "Type", + "name": "type", + "type": "options", + "options": [ + { + "label": "String", + "name": "string" + }, + { + "label": "Number", + "name": "number" + }, + { + "label": "Boolean", + "name": "boolean" + }, + { + "label": "Options", + "name": "options" + } + ], + "default": "string" + }, + { + "label": "Label", + "name": "label", + "type": "string", + "placeholder": "Label for the input" + }, + { + "label": "Variable Name", + "name": "name", + "type": "string", + "placeholder": "Variable name for the input (must be camel case)", + "description": "Variable name must be camel case. For example: firstName, lastName, etc." + }, + { + "label": "Add Options", + "name": "addOptions", + "type": "array", + "show": { + "formInputTypes[$index].type": "options" + }, + "array": [ + { + "label": "Option", + "name": "option", + "type": "string" + } + ] + } + ], + "id": "startAgentflow_0-input-formInputTypes-array", + "display": false + }, + { + "label": "Ephemeral Memory", + "name": "startEphemeralMemory", + "type": "boolean", + "description": "Start fresh for every execution without past chat history", + "optional": true, + "id": "startAgentflow_0-input-startEphemeralMemory-boolean", + "display": true + }, + { + "label": "Flow State", + "name": "startState", + "description": "Runtime state during the execution of the workflow", + "type": "array", + "optional": true, + "array": [ + { + "label": "Key", + "name": "key", + "type": "string", + "placeholder": "Foo" + }, + { + "label": "Value", + "name": "value", + "type": "string", + "placeholder": "Bar", + "optional": true + } + ], + "id": "startAgentflow_0-input-startState-array", + "display": true + }, + { + "label": "Persist State", + "name": "startPersistState", + "type": "boolean", + "description": "Persist the state in the same session", + "optional": true, + "id": "startAgentflow_0-input-startPersistState-boolean", + "display": true + } + ], + "inputAnchors": [], + "inputs": { + "startInputType": "chatInput", + "formTitle": "", + "formDescription": "", + "formInputTypes": "", + "startEphemeralMemory": "", + "startState": [ + { + "key": "next", + "value": "" + }, + { + "key": "instructions", + "value": "" + } + ], + "startPersistState": "" + }, + "outputAnchors": [ + { + "id": "startAgentflow_0-output-startAgentflow", + "label": "Start", + "name": "startAgentflow" + } + ], + "outputs": {}, + "selected": false + }, + "width": 103, + "height": 66, + "positionAbsolute": { + "x": 100.79370050359839, + "y": 100 + }, + "selected": false, + "dragging": false + }, + { + "id": "llmAgentflow_0", + "position": { + "x": 272.17480201439355, + "y": 45.45629949640161 + }, + "data": { + "id": "llmAgentflow_0", + "label": "Supervisor", + "version": 1, + "name": "llmAgentflow", + "type": "LLM", + "color": "#64B5F6", + "baseClasses": [ + "LLM" + ], + "category": "Agent Flows", + "description": "Large language models to analyze user-provided inputs and generate responses", + "inputParams": [ + { + "label": "Model", + "name": "llmModel", + "type": "asyncOptions", + "loadMethod": "listModels", + "loadConfig": true, + "id": "llmAgentflow_0-input-llmModel-asyncOptions", + "display": true + }, + { + "label": "Messages", + "name": "llmMessages", + "type": "array", + "optional": true, + "acceptVariable": true, + "array": [ + { + "label": "Role", + "name": "role", + "type": "options", + "options": [ + { + "label": "System", + "name": "system" + }, + { + "label": "Assistant", + "name": "assistant" + }, + { + "label": "Developer", + "name": "developer" + }, + { + "label": "User", + "name": "user" + } + ] + }, + { + "label": "Content", + "name": "content", + "type": "string", + "acceptVariable": true, + "generateInstruction": true, + "rows": 4 + } + ], + "id": "llmAgentflow_0-input-llmMessages-array", + "display": true + }, + { + "label": "Enable Memory", + "name": "llmEnableMemory", + "type": "boolean", + "description": "Enable memory for the conversation thread", + "default": true, + "optional": true, + "id": "llmAgentflow_0-input-llmEnableMemory-boolean", + "display": true + }, + { + "label": "Memory Type", + "name": "llmMemoryType", + "type": "options", + "options": [ + { + "label": "All Messages", + "name": "allMessages", + "description": "Retrieve all messages from the conversation" + }, + { + "label": "Window Size", + "name": "windowSize", + "description": "Uses a fixed window size to surface the last N messages" + }, + { + "label": "Conversation Summary", + "name": "conversationSummary", + "description": "Summarizes the whole conversation" + }, + { + "label": "Conversation Summary Buffer", + "name": "conversationSummaryBuffer", + "description": "Summarize conversations once token limit is reached. Default to 2000" + } + ], + "optional": true, + "default": "allMessages", + "show": { + "llmEnableMemory": true + }, + "id": "llmAgentflow_0-input-llmMemoryType-options", + "display": true + }, + { + "label": "Window Size", + "name": "llmMemoryWindowSize", + "type": "number", + "default": "20", + "description": "Uses a fixed window size to surface the last N messages", + "show": { + "llmMemoryType": "windowSize" + }, + "id": "llmAgentflow_0-input-llmMemoryWindowSize-number", + "display": false + }, + { + "label": "Max Token Limit", + "name": "llmMemoryMaxTokenLimit", + "type": "number", + "default": "2000", + "description": "Summarize conversations once token limit is reached. Default to 2000", + "show": { + "llmMemoryType": "conversationSummaryBuffer" + }, + "id": "llmAgentflow_0-input-llmMemoryMaxTokenLimit-number", + "display": false + }, + { + "label": "Input Message", + "name": "llmUserMessage", + "type": "string", + "description": "Add an input message as user message at the end of the conversation", + "rows": 4, + "optional": true, + "acceptVariable": true, + "show": { + "llmEnableMemory": true + }, + "id": "llmAgentflow_0-input-llmUserMessage-string", + "display": true + }, + { + "label": "Return Response As", + "name": "llmReturnResponseAs", + "type": "options", + "options": [ + { + "label": "User Message", + "name": "userMessage" + }, + { + "label": "Assistant Message", + "name": "assistantMessage" + } + ], + "default": "userMessage", + "id": "llmAgentflow_0-input-llmReturnResponseAs-options", + "display": true + }, + { + "label": "JSON Structured Output", + "name": "llmStructuredOutput", + "description": "Instruct the LLM to give output in a JSON structured schema", + "type": "array", + "optional": true, + "acceptVariable": true, + "array": [ + { + "label": "Key", + "name": "key", + "type": "string" + }, + { + "label": "Type", + "name": "type", + "type": "options", + "options": [ + { + "label": "String", + "name": "string" + }, + { + "label": "String Array", + "name": "stringArray" + }, + { + "label": "Number", + "name": "number" + }, + { + "label": "Boolean", + "name": "boolean" + }, + { + "label": "Enum", + "name": "enum" + }, + { + "label": "JSON Array", + "name": "jsonArray" + } + ] + }, + { + "label": "Enum Values", + "name": "enumValues", + "type": "string", + "placeholder": "value1, value2, value3", + "description": "Enum values. Separated by comma", + "optional": true, + "show": { + "llmStructuredOutput[$index].type": "enum" + } + }, + { + "label": "JSON Schema", + "name": "jsonSchema", + "type": "code", + "placeholder": "{\n \"answer\": {\n \"type\": \"string\",\n \"description\": \"Value of the answer\"\n },\n \"reason\": {\n \"type\": \"string\",\n \"description\": \"Reason for the answer\"\n },\n \"optional\": {\n \"type\": \"boolean\"\n },\n \"count\": {\n \"type\": \"number\"\n },\n \"children\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"value\": {\n \"type\": \"string\",\n \"description\": \"Value of the children's answer\"\n }\n }\n }\n }\n}", + "description": "JSON schema for the structured output", + "optional": true, + "show": { + "llmStructuredOutput[$index].type": "jsonArray" + } + }, + { + "label": "Description", + "name": "description", + "type": "string", + "placeholder": "Description of the key" + } + ], + "id": "llmAgentflow_0-input-llmStructuredOutput-array", + "display": true + }, + { + "label": "Update Flow State", + "name": "llmUpdateState", + "description": "Update runtime state during the execution of the workflow", + "type": "array", + "optional": true, + "acceptVariable": true, + "array": [ + { + "label": "Key", + "name": "key", + "type": "asyncOptions", + "loadMethod": "listRuntimeStateKeys", + "freeSolo": true + }, + { + "label": "Value", + "name": "value", + "type": "string", + "acceptVariable": true, + "acceptNodeOutputAsVariable": true + } + ], + "id": "llmAgentflow_0-input-llmUpdateState-array", + "display": true + } + ], + "inputAnchors": [], + "inputs": { + "llmModel": "chatOpenAICustom", + "llmMessages": [ + { + "role": "system", + "content": "

You are a supervisor tasked with managing a conversation between the following workers:

  • Software Engineer

  • Code Reviewer

Given the following user request, respond with the worker to act next.
Each worker will perform a task and respond with their results and status.
When finished, respond with FINISH.
Select strategically to minimize the number of steps taken.

" + } + ], + "llmEnableMemory": true, + "llmMemoryType": "allMessages", + "llmUserMessage": "

", + "llmReturnResponseAs": "userMessage", + "llmStructuredOutput": [ + { + "key": "next", + "type": "enum", + "enumValues": "SOFTWARE, REVIEWER, FINISH", + "jsonSchema": "", + "description": "The next worker to act" + }, + { + "key": "instructions", + "type": "string", + "enumValues": "", + "jsonSchema": "", + "description": "The specific instructions of the sub-tasks the next worker should accomplish." + } + ], + "llmUpdateState": [ + { + "key": "next", + "value": "

{{ output.next }}

" + }, + { + "key": "instructions", + "value": "

{{ output.instructions }}

" + } + ], + "llmModelConfig": { + "cache": "", + "modelName": "your-model-id", + "temperature": 0.9, + "streaming": true, + "maxTokens": "", + "topP": "", + "frequencyPenalty": "", + "presencePenalty": "", + "timeout": "", + "basepath": "your-model-endpoint", + "baseOptions": "", + "llmModel": "chatOpenAICustom" + } + }, + "outputAnchors": [ + { + "id": "llmAgentflow_0-output-llmAgentflow", + "label": "LLM", + "name": "llmAgentflow" + } + ], + "outputs": {}, + "selected": false + }, + "type": "agentFlow", + "width": 293, + "height": 72, + "selected": false, + "dragging": false, + "positionAbsolute": { + "x": 272.17480201439355, + "y": 45.45629949640161 + } + }, + { + "id": "stickyNoteAgentflow_0", + "position": { + "x": 320.5771778416711, + "y": -43.40187172705003 + }, + "data": { + "id": "stickyNoteAgentflow_0", + "label": "Sticky Note", + "version": 1, + "name": "stickyNoteAgentflow", + "type": "StickyNote", + "color": "#fee440", + "baseClasses": [ + "StickyNote" + ], + "category": "Agent Flows", + "description": "Add notes to the agent flow", + "inputParams": [ + { + "label": "", + "name": "note", + "type": "string", + "rows": 1, + "placeholder": "Type something here", + "optional": true, + "id": "stickyNoteAgentflow_0-input-note-string", + "display": true + } + ], + "inputAnchors": [], + "inputs": { + "note": "Decide whom to call next" + }, + "outputAnchors": [ + { + "id": "stickyNoteAgentflow_0-output-stickyNoteAgentflow", + "label": "Sticky Note", + "name": "stickyNoteAgentflow" + } + ], + "outputs": {}, + "selected": false + }, + "type": "stickyNote", + "width": 210, + "height": 82, + "selected": false, + "positionAbsolute": { + "x": 320.5771778416711, + "y": -43.40187172705003 + }, + "dragging": false + }, + { + "id": "conditionAgentflow_0", + "position": { + "x": 632.6376351781316, + "y": 44.06337565982208 + }, + "data": { + "id": "conditionAgentflow_0", + "label": "Condition 0", + "version": 1, + "name": "conditionAgentflow", + "type": "Condition", + "color": "#FFB938", + "baseClasses": [ + "Condition" + ], + "category": "Agent Flows", + "description": "Split flows based on If Else conditions", + "inputParams": [ + { + "label": "Conditions", + "name": "conditions", + "type": "array", + "description": "Values to compare", + "acceptVariable": true, + "default": [ + { + "type": "string", + "value1": "

{{ $flow.state.next }}

", + "operation": "equal", + "value2": "

SOFTWARE

" + } + ], + "array": [ + { + "label": "Type", + "name": "type", + "type": "options", + "options": [ + { + "label": "String", + "name": "string" + }, + { + "label": "Number", + "name": "number" + }, + { + "label": "Boolean", + "name": "boolean" + } + ], + "default": "string" + }, + { + "label": "Value 1", + "name": "value1", + "type": "string", + "default": "", + "description": "First value to be compared with", + "acceptVariable": true, + "show": { + "conditions[$index].type": "string" + } + }, + { + "label": "Operation", + "name": "operation", + "type": "options", + "options": [ + { + "label": "Contains", + "name": "contains" + }, + { + "label": "Ends With", + "name": "endsWith" + }, + { + "label": "Equal", + "name": "equal" + }, + { + "label": "Not Contains", + "name": "notContains" + }, + { + "label": "Not Equal", + "name": "notEqual" + }, + { + "label": "Regex", + "name": "regex" + }, + { + "label": "Starts With", + "name": "startsWith" + }, + { + "label": "Is Empty", + "name": "isEmpty" + }, + { + "label": "Not Empty", + "name": "notEmpty" + } + ], + "default": "equal", + "description": "Type of operation", + "show": { + "conditions[$index].type": "string" + } + }, + { + "label": "Value 2", + "name": "value2", + "type": "string", + "default": "", + "description": "Second value to be compared with", + "acceptVariable": true, + "show": { + "conditions[$index].type": "string" + }, + "hide": { + "conditions[$index].operation": [ + "isEmpty", + "notEmpty" + ] + } + }, + { + "label": "Value 1", + "name": "value1", + "type": "number", + "default": "", + "description": "First value to be compared with", + "acceptVariable": true, + "show": { + "conditions[$index].type": "number" + } + }, + { + "label": "Operation", + "name": "operation", + "type": "options", + "options": [ + { + "label": "Smaller", + "name": "smaller" + }, + { + "label": "Smaller Equal", + "name": "smallerEqual" + }, + { + "label": "Equal", + "name": "equal" + }, + { + "label": "Not Equal", + "name": "notEqual" + }, + { + "label": "Larger", + "name": "larger" + }, + { + "label": "Larger Equal", + "name": "largerEqual" + }, + { + "label": "Is Empty", + "name": "isEmpty" + }, + { + "label": "Not Empty", + "name": "notEmpty" + } + ], + "default": "equal", + "description": "Type of operation", + "show": { + "conditions[$index].type": "number" + } + }, + { + "label": "Value 2", + "name": "value2", + "type": "number", + "default": 0, + "description": "Second value to be compared with", + "acceptVariable": true, + "show": { + "conditions[$index].type": "number" + } + }, + { + "label": "Value 1", + "name": "value1", + "type": "boolean", + "default": false, + "description": "First value to be compared with", + "show": { + "conditions[$index].type": "boolean" + } + }, + { + "label": "Operation", + "name": "operation", + "type": "options", + "options": [ + { + "label": "Equal", + "name": "equal" + }, + { + "label": "Not Equal", + "name": "notEqual" + } + ], + "default": "equal", + "description": "Type of operation", + "show": { + "conditions[$index].type": "boolean" + } + }, + { + "label": "Value 2", + "name": "value2", + "type": "boolean", + "default": false, + "description": "Second value to be compared with", + "show": { + "conditions[$index].type": "boolean" + } + } + ], + "id": "conditionAgentflow_0-input-conditions-array", + "display": true + } + ], + "inputAnchors": [], + "inputs": { + "conditions": [ + { + "type": "string", + "value1": "

{{ $flow.state.next }}

", + "operation": "equal", + "value2": "

SOFTWARE

" + }, + { + "type": "string", + "value1": "

{{ $flow.state.next }}

", + "operation": "equal", + "value2": "

REVIEWER

" + } + ] + }, + "outputAnchors": [ + { + "id": "conditionAgentflow_0-output-0", + "label": 0, + "name": 0, + "description": "Condition 0" + }, + { + "id": "conditionAgentflow_0-output-1", + "label": 1, + "name": 1, + "description": "Condition 1" + }, + { + "id": "conditionAgentflow_0-output-2", + "label": 2, + "name": 2, + "description": "Else" + } + ], + "outputs": { + "conditionAgentflow": "" + }, + "selected": false + }, + "type": "agentFlow", + "width": 147, + "height": 100, + "positionAbsolute": { + "x": 632.6376351781316, + "y": 44.06337565982208 + }, + "selected": false, + "dragging": false + }, + { + "id": "llmAgentflow_1", + "position": { + "x": 874.4752250712392, + "y": -110.49146731274378 + }, + "data": { + "id": "llmAgentflow_1", + "label": "Software Engineer", + "version": 1, + "name": "llmAgentflow", + "type": "LLM", + "color": "#64B5F6", + "baseClasses": [ + "LLM" + ], + "category": "Agent Flows", + "description": "Large language models to analyze user-provided inputs and generate responses", + "inputParams": [ + { + "label": "Model", + "name": "llmModel", + "type": "asyncOptions", + "loadMethod": "listModels", + "loadConfig": true, + "id": "llmAgentflow_1-input-llmModel-asyncOptions", + "display": true + }, + { + "label": "Messages", + "name": "llmMessages", + "type": "array", + "optional": true, + "acceptVariable": true, + "array": [ + { + "label": "Role", + "name": "role", + "type": "options", + "options": [ + { + "label": "System", + "name": "system" + }, + { + "label": "Assistant", + "name": "assistant" + }, + { + "label": "Developer", + "name": "developer" + }, + { + "label": "User", + "name": "user" + } + ] + }, + { + "label": "Content", + "name": "content", + "type": "string", + "acceptVariable": true, + "generateInstruction": true, + "rows": 4 + } + ], + "id": "llmAgentflow_1-input-llmMessages-array", + "display": true + }, + { + "label": "Enable Memory", + "name": "llmEnableMemory", + "type": "boolean", + "description": "Enable memory for the conversation thread", + "default": true, + "optional": true, + "id": "llmAgentflow_1-input-llmEnableMemory-boolean", + "display": true + }, + { + "label": "Memory Type", + "name": "llmMemoryType", + "type": "options", + "options": [ + { + "label": "All Messages", + "name": "allMessages", + "description": "Retrieve all messages from the conversation" + }, + { + "label": "Window Size", + "name": "windowSize", + "description": "Uses a fixed window size to surface the last N messages" + }, + { + "label": "Conversation Summary", + "name": "conversationSummary", + "description": "Summarizes the whole conversation" + }, + { + "label": "Conversation Summary Buffer", + "name": "conversationSummaryBuffer", + "description": "Summarize conversations once token limit is reached. Default to 2000" + } + ], + "optional": true, + "default": "allMessages", + "show": { + "llmEnableMemory": true + }, + "id": "llmAgentflow_1-input-llmMemoryType-options", + "display": true + }, + { + "label": "Window Size", + "name": "llmMemoryWindowSize", + "type": "number", + "default": "20", + "description": "Uses a fixed window size to surface the last N messages", + "show": { + "llmMemoryType": "windowSize" + }, + "id": "llmAgentflow_1-input-llmMemoryWindowSize-number", + "display": false + }, + { + "label": "Max Token Limit", + "name": "llmMemoryMaxTokenLimit", + "type": "number", + "default": "2000", + "description": "Summarize conversations once token limit is reached. Default to 2000", + "show": { + "llmMemoryType": "conversationSummaryBuffer" + }, + "id": "llmAgentflow_1-input-llmMemoryMaxTokenLimit-number", + "display": false + }, + { + "label": "Input Message", + "name": "llmUserMessage", + "type": "string", + "description": "Add an input message as user message at the end of the conversation", + "rows": 4, + "optional": true, + "acceptVariable": true, + "show": { + "llmEnableMemory": true + }, + "id": "llmAgentflow_1-input-llmUserMessage-string", + "display": true + }, + { + "label": "Return Response As", + "name": "llmReturnResponseAs", + "type": "options", + "options": [ + { + "label": "User Message", + "name": "userMessage" + }, + { + "label": "Assistant Message", + "name": "assistantMessage" + } + ], + "default": "userMessage", + "id": "llmAgentflow_1-input-llmReturnResponseAs-options", + "display": true + }, + { + "label": "JSON Structured Output", + "name": "llmStructuredOutput", + "description": "Instruct the LLM to give output in a JSON structured schema", + "type": "array", + "optional": true, + "acceptVariable": true, + "array": [ + { + "label": "Key", + "name": "key", + "type": "string" + }, + { + "label": "Type", + "name": "type", + "type": "options", + "options": [ + { + "label": "String", + "name": "string" + }, + { + "label": "String Array", + "name": "stringArray" + }, + { + "label": "Number", + "name": "number" + }, + { + "label": "Boolean", + "name": "boolean" + }, + { + "label": "Enum", + "name": "enum" + }, + { + "label": "JSON Array", + "name": "jsonArray" + } + ] + }, + { + "label": "Enum Values", + "name": "enumValues", + "type": "string", + "placeholder": "value1, value2, value3", + "description": "Enum values. Separated by comma", + "optional": true, + "show": { + "llmStructuredOutput[$index].type": "enum" + } + }, + { + "label": "JSON Schema", + "name": "jsonSchema", + "type": "code", + "placeholder": "{\n \"answer\": {\n \"type\": \"string\",\n \"description\": \"Value of the answer\"\n },\n \"reason\": {\n \"type\": \"string\",\n \"description\": \"Reason for the answer\"\n },\n \"optional\": {\n \"type\": \"boolean\"\n },\n \"count\": {\n \"type\": \"number\"\n },\n \"children\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"value\": {\n \"type\": \"string\",\n \"description\": \"Value of the children's answer\"\n }\n }\n }\n }\n}", + "description": "JSON schema for the structured output", + "optional": true, + "show": { + "llmStructuredOutput[$index].type": "jsonArray" + } + }, + { + "label": "Description", + "name": "description", + "type": "string", + "placeholder": "Description of the key" + } + ], + "id": "llmAgentflow_1-input-llmStructuredOutput-array", + "display": true + }, + { + "label": "Update Flow State", + "name": "llmUpdateState", + "description": "Update runtime state during the execution of the workflow", + "type": "array", + "optional": true, + "acceptVariable": true, + "array": [ + { + "label": "Key", + "name": "key", + "type": "asyncOptions", + "loadMethod": "listRuntimeStateKeys", + "freeSolo": true + }, + { + "label": "Value", + "name": "value", + "type": "string", + "acceptVariable": true, + "acceptNodeOutputAsVariable": true + } + ], + "id": "llmAgentflow_1-input-llmUpdateState-array", + "display": true + } + ], + "inputAnchors": [], + "inputs": { + "llmModel": "chatOpenAICustom", + "llmMessages": [ + { + "role": "system", + "content": "

You are a full stack web developer.

Follow the instructions provided by the supervisor node.

" + } + ], + "llmEnableMemory": true, + "llmMemoryType": "allMessages", + "llmUserMessage": "

Supervisor instructions:

{{ $flow.state.instructions }}

", + "llmReturnResponseAs": "userMessage", + "llmStructuredOutput": "", + "llmUpdateState": "", + "llmModelConfig": { + "cache": "", + "modelName": "your-model-id", + "temperature": 0.9, + "streaming": true, + "maxTokens": "", + "topP": "", + "frequencyPenalty": "", + "presencePenalty": "", + "timeout": "", + "basepath": "your-model-endpoint", + "baseOptions": "", + "llmModel": "chatOpenAICustom" + } + }, + "outputAnchors": [ + { + "id": "llmAgentflow_1-output-llmAgentflow", + "label": "LLM", + "name": "llmAgentflow" + } + ], + "outputs": {}, + "selected": false + }, + "type": "agentFlow", + "width": 293, + "height": 72, + "selected": false, + "dragging": false, + "positionAbsolute": { + "x": 874.4752250712392, + "y": -110.49146731274378 + } + }, + { + "id": "loopAgentflow_0", + "position": { + "x": 1140.2828506850096, + "y": -241.9892448204126 + }, + "data": { + "id": "loopAgentflow_0", + "label": "Loop to Supervisor", + "version": 1, + "name": "loopAgentflow", + "type": "Loop", + "color": "#FFA07A", + "hideOutput": true, + "baseClasses": [ + "Loop" + ], + "category": "Agent Flows", + "description": "Loop back to a previous node", + "inputParams": [ + { + "label": "Loop Back To", + "name": "loopBackToNode", + "type": "asyncOptions", + "loadMethod": "listPreviousNodes", + "freeSolo": true, + "id": "loopAgentflow_0-input-loopBackToNode-asyncOptions", + "display": true + }, + { + "label": "Max Loop Count", + "name": "maxLoopCount", + "type": "number", + "default": 5, + "id": "loopAgentflow_0-input-maxLoopCount-number", + "display": true + } + ], + "inputAnchors": [], + "inputs": { + "loopBackToNode": "llmAgentflow_0-Supervisor", + "maxLoopCount": "10" + }, + "outputAnchors": [], + "outputs": {}, + "selected": false + }, + "type": "agentFlow", + "width": 195, + "height": 66, + "selected": false, + "dragging": false, + "positionAbsolute": { + "x": 1140.2828506850096, + "y": -241.9892448204126 + } + }, + { + "id": "llmAgentflow_2", + "position": { + "x": 872.3399689285774, + "y": 50.198281169132585 + }, + "data": { + "id": "llmAgentflow_2", + "label": "Software Reviewer", + "version": 1, + "name": "llmAgentflow", + "type": "LLM", + "color": "#64B5F6", + "baseClasses": [ + "LLM" + ], + "category": "Agent Flows", + "description": "Large language models to analyze user-provided inputs and generate responses", + "inputParams": [ + { + "label": "Model", + "name": "llmModel", + "type": "asyncOptions", + "loadMethod": "listModels", + "loadConfig": true, + "id": "llmAgentflow_2-input-llmModel-asyncOptions", + "display": true + }, + { + "label": "Messages", + "name": "llmMessages", + "type": "array", + "optional": true, + "acceptVariable": true, + "array": [ + { + "label": "Role", + "name": "role", + "type": "options", + "options": [ + { + "label": "System", + "name": "system" + }, + { + "label": "Assistant", + "name": "assistant" + }, + { + "label": "Developer", + "name": "developer" + }, + { + "label": "User", + "name": "user" + } + ] + }, + { + "label": "Content", + "name": "content", + "type": "string", + "acceptVariable": true, + "generateInstruction": true, + "rows": 4 + } + ], + "id": "llmAgentflow_2-input-llmMessages-array", + "display": true + }, + { + "label": "Enable Memory", + "name": "llmEnableMemory", + "type": "boolean", + "description": "Enable memory for the conversation thread", + "default": true, + "optional": true, + "id": "llmAgentflow_2-input-llmEnableMemory-boolean", + "display": true + }, + { + "label": "Memory Type", + "name": "llmMemoryType", + "type": "options", + "options": [ + { + "label": "All Messages", + "name": "allMessages", + "description": "Retrieve all messages from the conversation" + }, + { + "label": "Window Size", + "name": "windowSize", + "description": "Uses a fixed window size to surface the last N messages" + }, + { + "label": "Conversation Summary", + "name": "conversationSummary", + "description": "Summarizes the whole conversation" + }, + { + "label": "Conversation Summary Buffer", + "name": "conversationSummaryBuffer", + "description": "Summarize conversations once token limit is reached. Default to 2000" + } + ], + "optional": true, + "default": "allMessages", + "show": { + "llmEnableMemory": true + }, + "id": "llmAgentflow_2-input-llmMemoryType-options", + "display": true + }, + { + "label": "Window Size", + "name": "llmMemoryWindowSize", + "type": "number", + "default": "20", + "description": "Uses a fixed window size to surface the last N messages", + "show": { + "llmMemoryType": "windowSize" + }, + "id": "llmAgentflow_2-input-llmMemoryWindowSize-number", + "display": false + }, + { + "label": "Max Token Limit", + "name": "llmMemoryMaxTokenLimit", + "type": "number", + "default": "2000", + "description": "Summarize conversations once token limit is reached. Default to 2000", + "show": { + "llmMemoryType": "conversationSummaryBuffer" + }, + "id": "llmAgentflow_2-input-llmMemoryMaxTokenLimit-number", + "display": false + }, + { + "label": "Input Message", + "name": "llmUserMessage", + "type": "string", + "description": "Add an input message as user message at the end of the conversation", + "rows": 4, + "optional": true, + "acceptVariable": true, + "show": { + "llmEnableMemory": true + }, + "id": "llmAgentflow_2-input-llmUserMessage-string", + "display": true + }, + { + "label": "Return Response As", + "name": "llmReturnResponseAs", + "type": "options", + "options": [ + { + "label": "User Message", + "name": "userMessage" + }, + { + "label": "Assistant Message", + "name": "assistantMessage" + } + ], + "default": "userMessage", + "id": "llmAgentflow_2-input-llmReturnResponseAs-options", + "display": true + }, + { + "label": "JSON Structured Output", + "name": "llmStructuredOutput", + "description": "Instruct the LLM to give output in a JSON structured schema", + "type": "array", + "optional": true, + "acceptVariable": true, + "array": [ + { + "label": "Key", + "name": "key", + "type": "string" + }, + { + "label": "Type", + "name": "type", + "type": "options", + "options": [ + { + "label": "String", + "name": "string" + }, + { + "label": "String Array", + "name": "stringArray" + }, + { + "label": "Number", + "name": "number" + }, + { + "label": "Boolean", + "name": "boolean" + }, + { + "label": "Enum", + "name": "enum" + }, + { + "label": "JSON Array", + "name": "jsonArray" + } + ] + }, + { + "label": "Enum Values", + "name": "enumValues", + "type": "string", + "placeholder": "value1, value2, value3", + "description": "Enum values. Separated by comma", + "optional": true, + "show": { + "llmStructuredOutput[$index].type": "enum" + } + }, + { + "label": "JSON Schema", + "name": "jsonSchema", + "type": "code", + "placeholder": "{\n \"answer\": {\n \"type\": \"string\",\n \"description\": \"Value of the answer\"\n },\n \"reason\": {\n \"type\": \"string\",\n \"description\": \"Reason for the answer\"\n },\n \"optional\": {\n \"type\": \"boolean\"\n },\n \"count\": {\n \"type\": \"number\"\n },\n \"children\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"value\": {\n \"type\": \"string\",\n \"description\": \"Value of the children's answer\"\n }\n }\n }\n }\n}", + "description": "JSON schema for the structured output", + "optional": true, + "show": { + "llmStructuredOutput[$index].type": "jsonArray" + } + }, + { + "label": "Description", + "name": "description", + "type": "string", + "placeholder": "Description of the key" + } + ], + "id": "llmAgentflow_2-input-llmStructuredOutput-array", + "display": true + }, + { + "label": "Update Flow State", + "name": "llmUpdateState", + "description": "Update runtime state during the execution of the workflow", + "type": "array", + "optional": true, + "acceptVariable": true, + "array": [ + { + "label": "Key", + "name": "key", + "type": "asyncOptions", + "loadMethod": "listRuntimeStateKeys", + "freeSolo": true + }, + { + "label": "Value", + "name": "value", + "type": "string", + "acceptVariable": true, + "acceptNodeOutputAsVariable": true + } + ], + "id": "llmAgentflow_2-input-llmUpdateState-array", + "display": true + } + ], + "inputAnchors": [], + "inputs": { + "llmModel": "chatOpenAICustom", + "llmMessages": [ + { + "role": "system", + "content": "

As a Quality Assurance Engineer, you are an integral part of our development team, ensuring that our software products are of the highest quality. Your meticulous attention to detail and expertise in testing methodologies are crucial in identifying defects and ensuring that our code meets the highest standards.

Your goal is to ensure the delivery of high-quality software through thorough code review and testing.

Review the codebase for the new feature designed and implemented by the Senior Software Engineer. Your expertise goes beyond mere code inspection; you are adept at ensuring that developments not only function as intended but also adhere to the team's coding standards, enhance maintainability, and seamlessly integrate with existing systems.

With a deep appreciation for collaborative development, you provide constructive feedback, guiding contributors towards best practices and fostering a culture of continuous improvement. Your meticulous approach to reviewing code, coupled with your ability to foresee potential issues and recommend proactive solutions, ensures the delivery of high-quality software that is robust, scalable, and aligned with the team's strategic goals.

Always pass back the review and feedback to Senior Software Engineer.

" + } + ], + "llmEnableMemory": true, + "llmMemoryType": "allMessages", + "llmUserMessage": "

Supervisor instructions:

{{ $flow.state.instructions }}

", + "llmReturnResponseAs": "userMessage", + "llmStructuredOutput": "", + "llmUpdateState": "", + "llmModelConfig": { + "cache": "", + "modelName": "your-model-id", + "temperature": 0.9, + "streaming": true, + "maxTokens": "", + "topP": "", + "frequencyPenalty": "", + "presencePenalty": "", + "timeout": "", + "basepath": "your-model-endpoint", + "baseOptions": "", + "llmModel": "chatOpenAICustom" + } + }, + "outputAnchors": [ + { + "id": "llmAgentflow_2-output-llmAgentflow", + "label": "LLM", + "name": "llmAgentflow" + } + ], + "outputs": {}, + "selected": false + }, + "type": "agentFlow", + "width": 293, + "height": 72, + "selected": false, + "dragging": false, + "positionAbsolute": { + "x": 872.3399689285774, + "y": 50.198281169132585 + } + }, + { + "id": "loopAgentflow_3", + "position": { + "x": 1240.5799084758846, + "y": 40.42263081949608 + }, + "data": { + "id": "loopAgentflow_3", + "label": "Loop to Supervisor", + "version": 1, + "name": "loopAgentflow", + "type": "Loop", + "color": "#FFA07A", + "hideOutput": true, + "baseClasses": [ + "Loop" + ], + "category": "Agent Flows", + "description": "Loop back to a previous node", + "inputParams": [ + { + "label": "Loop Back To", + "name": "loopBackToNode", + "type": "asyncOptions", + "loadMethod": "listPreviousNodes", + "freeSolo": true, + "id": "loopAgentflow_3-input-loopBackToNode-asyncOptions", + "display": true + }, + { + "label": "Max Loop Count", + "name": "maxLoopCount", + "type": "number", + "default": 5, + "id": "loopAgentflow_3-input-maxLoopCount-number", + "display": true + } + ], + "inputAnchors": [], + "inputs": { + "loopBackToNode": "llmAgentflow_0-Supervisor", + "maxLoopCount": "10" + }, + "outputAnchors": [], + "outputs": {}, + "selected": false + }, + "type": "agentFlow", + "width": 195, + "height": 66, + "selected": false, + "dragging": false, + "positionAbsolute": { + "x": 1240.5799084758846, + "y": 40.42263081949608 + } + }, + { + "id": "llmAgentflow_3", + "position": { + "x": 882.2598467634614, + "y": 238.1730646364603 + }, + "data": { + "id": "llmAgentflow_3", + "label": "Generate Final Response", + "version": 1, + "name": "llmAgentflow", + "type": "LLM", + "color": "#64B5F6", + "baseClasses": [ + "LLM" + ], + "category": "Agent Flows", + "description": "Large language models to analyze user-provided inputs and generate responses", + "inputParams": [ + { + "label": "Model", + "name": "llmModel", + "type": "asyncOptions", + "loadMethod": "listModels", + "loadConfig": true, + "id": "llmAgentflow_3-input-llmModel-asyncOptions", + "display": true + }, + { + "label": "Messages", + "name": "llmMessages", + "type": "array", + "optional": true, + "acceptVariable": true, + "array": [ + { + "label": "Role", + "name": "role", + "type": "options", + "options": [ + { + "label": "System", + "name": "system" + }, + { + "label": "Assistant", + "name": "assistant" + }, + { + "label": "Developer", + "name": "developer" + }, + { + "label": "User", + "name": "user" + } + ] + }, + { + "label": "Content", + "name": "content", + "type": "string", + "acceptVariable": true, + "generateInstruction": true, + "rows": 4 + } + ], + "id": "llmAgentflow_3-input-llmMessages-array", + "display": true + }, + { + "label": "Enable Memory", + "name": "llmEnableMemory", + "type": "boolean", + "description": "Enable memory for the conversation thread", + "default": true, + "optional": true, + "id": "llmAgentflow_3-input-llmEnableMemory-boolean", + "display": true + }, + { + "label": "Memory Type", + "name": "llmMemoryType", + "type": "options", + "options": [ + { + "label": "All Messages", + "name": "allMessages", + "description": "Retrieve all messages from the conversation" + }, + { + "label": "Window Size", + "name": "windowSize", + "description": "Uses a fixed window size to surface the last N messages" + }, + { + "label": "Conversation Summary", + "name": "conversationSummary", + "description": "Summarizes the whole conversation" + }, + { + "label": "Conversation Summary Buffer", + "name": "conversationSummaryBuffer", + "description": "Summarize conversations once token limit is reached. Default to 2000" + } + ], + "optional": true, + "default": "allMessages", + "show": { + "llmEnableMemory": true + }, + "id": "llmAgentflow_3-input-llmMemoryType-options", + "display": true + }, + { + "label": "Window Size", + "name": "llmMemoryWindowSize", + "type": "number", + "default": "20", + "description": "Uses a fixed window size to surface the last N messages", + "show": { + "llmMemoryType": "windowSize" + }, + "id": "llmAgentflow_3-input-llmMemoryWindowSize-number", + "display": false + }, + { + "label": "Max Token Limit", + "name": "llmMemoryMaxTokenLimit", + "type": "number", + "default": "2000", + "description": "Summarize conversations once token limit is reached. Default to 2000", + "show": { + "llmMemoryType": "conversationSummaryBuffer" + }, + "id": "llmAgentflow_3-input-llmMemoryMaxTokenLimit-number", + "display": false + }, + { + "label": "Input Message", + "name": "llmUserMessage", + "type": "string", + "description": "Add an input message as user message at the end of the conversation", + "rows": 4, + "optional": true, + "acceptVariable": true, + "show": { + "llmEnableMemory": true + }, + "id": "llmAgentflow_3-input-llmUserMessage-string", + "display": true + }, + { + "label": "Return Response As", + "name": "llmReturnResponseAs", + "type": "options", + "options": [ + { + "label": "User Message", + "name": "userMessage" + }, + { + "label": "Assistant Message", + "name": "assistantMessage" + } + ], + "default": "userMessage", + "id": "llmAgentflow_3-input-llmReturnResponseAs-options", + "display": true + }, + { + "label": "JSON Structured Output", + "name": "llmStructuredOutput", + "description": "Instruct the LLM to give output in a JSON structured schema", + "type": "array", + "optional": true, + "acceptVariable": true, + "array": [ + { + "label": "Key", + "name": "key", + "type": "string" + }, + { + "label": "Type", + "name": "type", + "type": "options", + "options": [ + { + "label": "String", + "name": "string" + }, + { + "label": "String Array", + "name": "stringArray" + }, + { + "label": "Number", + "name": "number" + }, + { + "label": "Boolean", + "name": "boolean" + }, + { + "label": "Enum", + "name": "enum" + }, + { + "label": "JSON Array", + "name": "jsonArray" + } + ] + }, + { + "label": "Enum Values", + "name": "enumValues", + "type": "string", + "placeholder": "value1, value2, value3", + "description": "Enum values. Separated by comma", + "optional": true, + "show": { + "llmStructuredOutput[$index].type": "enum" + } + }, + { + "label": "JSON Schema", + "name": "jsonSchema", + "type": "code", + "placeholder": "{\n \"answer\": {\n \"type\": \"string\",\n \"description\": \"Value of the answer\"\n },\n \"reason\": {\n \"type\": \"string\",\n \"description\": \"Reason for the answer\"\n },\n \"optional\": {\n \"type\": \"boolean\"\n },\n \"count\": {\n \"type\": \"number\"\n },\n \"children\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"value\": {\n \"type\": \"string\",\n \"description\": \"Value of the children's answer\"\n }\n }\n }\n }\n}", + "description": "JSON schema for the structured output", + "optional": true, + "show": { + "llmStructuredOutput[$index].type": "jsonArray" + } + }, + { + "label": "Description", + "name": "description", + "type": "string", + "placeholder": "Description of the key" + } + ], + "id": "llmAgentflow_3-input-llmStructuredOutput-array", + "display": true + }, + { + "label": "Update Flow State", + "name": "llmUpdateState", + "description": "Update runtime state during the execution of the workflow", + "type": "array", + "optional": true, + "acceptVariable": true, + "array": [ + { + "label": "Key", + "name": "key", + "type": "asyncOptions", + "loadMethod": "listRuntimeStateKeys", + "freeSolo": true + }, + { + "label": "Value", + "name": "value", + "type": "string", + "acceptVariable": true, + "acceptNodeOutputAsVariable": true + } + ], + "id": "llmAgentflow_3-input-llmUpdateState-array", + "display": true + } + ], + "inputAnchors": [], + "inputs": { + "llmModel": "chatOpenAICustom", + "llmMessages": [], + "llmEnableMemory": true, + "llmMemoryType": "allMessages", + "llmUserMessage": "

Given the above conversations, generate a detailed solution developed by the Software Engineer and code reviewer.

Include full code, improvement and reviews.

", + "llmReturnResponseAs": "userMessage", + "llmStructuredOutput": "", + "llmUpdateState": "", + "llmModelConfig": { + "cache": "", + "modelName": "your-model-id", + "temperature": 0.9, + "streaming": true, + "maxTokens": "", + "topP": "", + "frequencyPenalty": "", + "presencePenalty": "", + "timeout": "", + "basepath": "your-model-endpoint", + "baseOptions": "", + "llmModel": "chatOpenAICustom" + } + }, + "outputAnchors": [ + { + "id": "llmAgentflow_3-output-llmAgentflow", + "label": "LLM", + "name": "llmAgentflow" + } + ], + "outputs": {}, + "selected": false + }, + "type": "agentFlow", + "width": 293, + "height": 72, + "selected": false, + "dragging": false, + "positionAbsolute": { + "x": 882.2598467634614, + "y": 238.1730646364603 + } + } + ], + "edges": [ + { + "source": "startAgentflow_0", + "sourceHandle": "startAgentflow_0-output-startAgentflow", + "target": "llmAgentflow_0", + "targetHandle": "llmAgentflow_0", + "data": { + "sourceColor": "#7EE787", + "targetColor": "#64B5F6", + "isHumanInput": false + }, + "type": "agentFlow", + "id": "startAgentflow_0-startAgentflow_0-output-startAgentflow-llmAgentflow_0-llmAgentflow_0" + }, + { + "source": "llmAgentflow_0", + "sourceHandle": "llmAgentflow_0-output-llmAgentflow", + "target": "conditionAgentflow_0", + "targetHandle": "conditionAgentflow_0", + "data": { + "sourceColor": "#64B5F6", + "targetColor": "#FFB938", + "isHumanInput": false + }, + "type": "agentFlow", + "id": "llmAgentflow_0-llmAgentflow_0-output-llmAgentflow-conditionAgentflow_0-conditionAgentflow_0" + }, + { + "source": "conditionAgentflow_0", + "sourceHandle": "conditionAgentflow_0-output-0", + "target": "llmAgentflow_1", + "targetHandle": "llmAgentflow_1", + "data": { + "sourceColor": "#FFB938", + "targetColor": "#64B5F6", + "edgeLabel": "0", + "isHumanInput": false + }, + "type": "agentFlow", + "id": "conditionAgentflow_0-conditionAgentflow_0-output-0-llmAgentflow_1-llmAgentflow_1" + }, + { + "source": "llmAgentflow_1", + "sourceHandle": "llmAgentflow_1-output-llmAgentflow", + "target": "loopAgentflow_0", + "targetHandle": "loopAgentflow_0", + "data": { + "sourceColor": "#64B5F6", + "targetColor": "#FFA07A", + "isHumanInput": false + }, + "type": "agentFlow", + "id": "llmAgentflow_1-llmAgentflow_1-output-llmAgentflow-loopAgentflow_0-loopAgentflow_0" + }, + { + "source": "llmAgentflow_2", + "sourceHandle": "llmAgentflow_2-output-llmAgentflow", + "target": "loopAgentflow_3", + "targetHandle": "loopAgentflow_3", + "data": { + "sourceColor": "#64B5F6", + "targetColor": "#FFA07A", + "isHumanInput": false + }, + "type": "agentFlow", + "id": "llmAgentflow_2-llmAgentflow_2-output-llmAgentflow-loopAgentflow_3-loopAgentflow_3" + }, + { + "source": "conditionAgentflow_0", + "sourceHandle": "conditionAgentflow_0-output-1", + "target": "llmAgentflow_2", + "targetHandle": "llmAgentflow_2", + "data": { + "sourceColor": "#FFB938", + "targetColor": "#64B5F6", + "edgeLabel": "1", + "isHumanInput": false + }, + "type": "agentFlow", + "id": "conditionAgentflow_0-conditionAgentflow_0-output-1-llmAgentflow_2-llmAgentflow_2" + }, + { + "source": "conditionAgentflow_0", + "sourceHandle": "conditionAgentflow_0-output-2", + "target": "llmAgentflow_3", + "targetHandle": "llmAgentflow_3", + "data": { + "sourceColor": "#FFB938", + "targetColor": "#64B5F6", + "edgeLabel": "2", + "isHumanInput": false + }, + "type": "agentFlow", + "id": "conditionAgentflow_0-conditionAgentflow_0-output-2-llmAgentflow_3-llmAgentflow_3" + } + ] +} \ No newline at end of file diff --git a/plugins/agenticai/vars/agenticai-plugin-vars.yml b/plugins/agenticai/vars/agenticai-plugin-vars.yml new file mode 100644 index 00000000..80bba758 --- /dev/null +++ b/plugins/agenticai/vars/agenticai-plugin-vars.yml @@ -0,0 +1,72 @@ +--- +# Copyright (C) 2024-2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +############################################################################## +# Agentic AI Plugin - Internal Variables +############################################################################## +# These are internal implementation variables for the Agentic AI plugin. +# Users should not need to modify these values. +# Currently implements: Flowise (visual AI agent builder) +############################################################################## + +# Namespace +agenticai_namespace: flowise + +# Image Configuration +agenticai_image_repository: flowiseai/flowise +agenticai_image_tag: "3.0.12" +agenticai_replica_count: 1 + +# Database Configuration (PostgreSQL) +agenticai_database_type: postgres +agenticai_postgres_enabled: true +agenticai_postgres_version: 15 +agenticai_postgres_port: 5432 +agenticai_postgres_database: flowisedb +agenticai_postgres_username: flowise +agenticai_postgres_storage_size: 5Gi + +# Redis Configuration +agenticai_redis_enabled: true +agenticai_redis_port: 6379 +agenticai_redis_storage_size: 2Gi + +# Worker Configuration +agenticai_worker_enabled: true +agenticai_worker_replica_count: 1 +agenticai_worker_cpu_request: 500m +agenticai_worker_memory_request: 1Gi +agenticai_worker_cpu_limit: 1 +agenticai_worker_memory_limit: 2Gi + +# Application Resources +agenticai_cpu_request: 500m +agenticai_memory_request: 1Gi +agenticai_cpu_limit: 1 +agenticai_memory_limit: 2Gi + +# Ingress Configuration +agenticai_ingress_enabled: true +agenticai_ingress_class: nginx +agenticai_ingress_path: / +agenticai_tls_enabled: true + +# Storage Configuration +agenticai_persistence_enabled: true +agenticai_persistence_storage_class: local-path +agenticai_persistence_access_mode: ReadWriteOnce +agenticai_persistence_size: 10Gi + +# Helm Chart Configuration +agenticai_helm_repo_name: cowboysysop +agenticai_helm_repo_url: https://cowboysysop.github.io/charts/ +agenticai_helm_chart_name: flowise +agenticai_helm_chart_version: "6.0.0" + +# Autoscaling (disabled by default for simplicity) +agenticai_autoscaling_enabled: false +agenticai_autoscaling_min_replicas: 1 +agenticai_autoscaling_max_replicas: 3 +agenticai_autoscaling_target_cpu: 80 +agenticai_autoscaling_target_memory: 80 From e7b84b839e597c250566c08ff3c6a0a6a94fbfd3 Mon Sep 17 00:00:00 2001 From: Harika Date: Wed, 18 Feb 2026 11:56:25 -0600 Subject: [PATCH 33/35] update custom iso readme Signed-off-by: Harika --- .../EI/single-node/user-guide-apisix.md | 8 ++--- .../EI/single-node/user-guide-genai.md | 6 ++-- .../Dell/ubuntu-22.04/iac/iso/README.md | 29 +++++++++++++++---- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md index 723eb1cd..b00b40c8 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-apisix.md @@ -181,11 +181,11 @@ deploy_istio=off uninstall_ceph=off ``` -To support non-interactive execution of inference-stack-deploy.sh, create a file named "core/inentory/.become-passfile" with your user's sudo password: +To support non-interactive execution of inference-stack-deploy.sh, create a file named "core/inventory/.become-passfile" with your user's sudo password: ```bash -vi core/inentory/.become-passfile -chmod 600 core/inentory/.become-passfile +vi core/inventory/.become-passfile +chmod 600 core/inventory/.become-passfile ``` **Update hosts.yaml File** @@ -266,7 +266,7 @@ If a valid token is returned (long JWT string), the environment is ready for inf > Note: Replace ${BASE_URL} with your DNS ```bash -curl -k ${BASE_URL}/Llama-3.1-8B-Instruct/v1/completions \ +curl -k https://${BASE_URL}/Llama-3.1-8B-Instruct/v1/completions \ -X POST \ -d '{"model": "meta-llama/Llama-3.1-8B-Instruct", "prompt": "What is Deep Learning?", "max_tokens": 25, "temperature": 0}' \ -H 'Content-Type: application/json' \ diff --git a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md index 12fdd7a2..b362feaa 100644 --- a/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md +++ b/third_party/Dell/ubuntu-22.04/EI/single-node/user-guide-genai.md @@ -225,7 +225,7 @@ Reference the litellm_master_key file under core/inventory/metadata/vault.yml fo **Run a test query for Gaudi:** ```bash -curl -k ${BASE_URL}/Llama-3.1-8B-Instruct/v1/completions \ +curl -k https://${BASE_URL}/v1/completions \ -X POST \ -H "Content-Type: application/json" \ -H "Authorization: Bearer <>" \ @@ -239,8 +239,8 @@ curl -k ${BASE_URL}/Llama-3.1-8B-Instruct/v1/completions \ **Run a test query for CPU:** ```bash -curl -k ${BASE_URL}/Llama-3.1-8B-Instruct-vllmcpu/v1/completions \ --X POST \ +curl -k https://${BASE_URL}/v1/completions \ + -X POST \ -H "Content-Type: application/json" \ -H "Authorization: Bearer <>" \ -d '{ diff --git a/third_party/Dell/ubuntu-22.04/iac/iso/README.md b/third_party/Dell/ubuntu-22.04/iac/iso/README.md index 7e148f90..28d8eb92 100644 --- a/third_party/Dell/ubuntu-22.04/iac/iso/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/iso/README.md @@ -37,21 +37,38 @@ brew install xorriso rsync ## Usage -The script requires `--hostname`, `--username`, and `--password-hash`. All other values have defaults. +### Change permission to your file +```bash +chmod +x custom-iso.sh +``` + +**Use this if you do not already have the Ubuntu ISO downloaded** + +set `--hostname`, `--username`, `--password-hash`. All other values have defaults. + +The script will automatically download ubuntu iso. +```bash +sudo ./custom-iso.sh \ + --hostname=ubuntu-server-001 \ + --username=user \ + --password-hash=Replace-with-your-password-hash +``` + -Basic example using a local ISO: +**Use this if you already have downloaded ubuntu ISO locally:** ```bash -./custom-iso.sh \ +sudo ./custom-iso.sh \ --hostname=ubuntu-server-001 \ --username=user \ - --iso=./ubuntu-22.04.5-live-server-amd64.iso + --password-hash=Replace-with-your-password-hash \ + --iso=Replace-with-path-to-your-local-iso ``` Full example with all options: ```bash -./custom-iso.sh \ +sudo ./custom-iso.sh \ --hostname=ubuntu-server-001 \ --username=user \ --password-hash='$6$Sl0xydNgA3rBk1Uo$Pj7oVVI7smkdBh20V8EyLivWpKDHFueUhvrfwxundGp/DQrAuTHjIxnrCZIMVJ1zcTIJ7VgIWKu0mUZmiRsqv0' \ @@ -63,7 +80,6 @@ Full example with all options: --storage-layout=direct \ --instance-id=ubuntu-server-001 \ --packages='' \ - --iso=./ubuntu-22.04.5-live-server-amd64.iso \ --iso-url=https://releases.ubuntu.com/jammy/ubuntu-22.04.5-live-server-amd64.iso \ --iso-name=ubuntu-22.04.5-live-server-amd64.iso \ --out-iso=ubuntu-22.04.5-autoinstall.iso \ @@ -72,6 +88,7 @@ Full example with all options: Notes: - If `--iso` is provided, `--iso-url` and `--iso-name` are ignored. +- If `--iso` is not provided, the script downloads the ISO using --iso-url. - If `--ssh-key` is empty, no SSH key is embedded. - `--packages` is a comma-separated list (e.g., `--packages=openssh-server,curl`). - `--volid` must be 32 characters or fewer. From 296ad49f61d04129c469ff955f114f3d1caa16f1 Mon Sep 17 00:00:00 2001 From: Harika Date: Wed, 18 Feb 2026 12:03:20 -0600 Subject: [PATCH 34/35] update custom iso readme Signed-off-by: Harika --- third_party/Dell/ubuntu-22.04/iac/iso/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/Dell/ubuntu-22.04/iac/iso/README.md b/third_party/Dell/ubuntu-22.04/iac/iso/README.md index 28d8eb92..d5f0129c 100644 --- a/third_party/Dell/ubuntu-22.04/iac/iso/README.md +++ b/third_party/Dell/ubuntu-22.04/iac/iso/README.md @@ -41,6 +41,7 @@ brew install xorriso rsync ```bash chmod +x custom-iso.sh ``` +> Note: make sure to run the custom-iso.sh script with sudo privileges. **Use this if you do not already have the Ubuntu ISO downloaded** From efcdff447253c309e7b807f6ae8b6a5745e4a84b Mon Sep 17 00:00:00 2001 From: Harika Date: Thu, 19 Feb 2026 10:39:07 -0600 Subject: [PATCH 35/35] delete code-scan.yaml files Signed-off-by: Harika --- .github/workflows/code-scans.yaml | 167 ------------------------------ 1 file changed, 167 deletions(-) delete mode 100644 .github/workflows/code-scans.yaml diff --git a/.github/workflows/code-scans.yaml b/.github/workflows/code-scans.yaml deleted file mode 100644 index 3d37937e..00000000 --- a/.github/workflows/code-scans.yaml +++ /dev/null @@ -1,167 +0,0 @@ -name: SDLE Scans - -on: - workflow_dispatch: - inputs: - PR_number: - description: 'Pull request number' - required: true - push: - branches: [ main ] - pull_request: - types: [opened, synchronize, reopened, ready_for_review] - -concurrency: - group: sdle-${{ github.event.inputs.PR_number || github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -permissions: - contents: read - actions: read - -jobs: - -# ----------------------------- -# 1) Trivy Scan -# ----------------------------- - trivy_scan: - name: Trivy Vulnerability Scan - runs-on: self-hosted - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ github.event.inputs.PR_number && format('refs/pull/{0}/merge', github.event.inputs.PR_number) || '' }} - - - name: Create report directory - run: mkdir -p trivy-reports - - - name: Run Trivy FS Scan - uses: aquasecurity/trivy-action@0.28.0 - continue-on-error: true - with: - scan-type: 'fs' - scan-ref: '.' - scanners: 'vuln,misconfig,secret' - severity: 'CRITICAL,HIGH' - format: 'table' - output: 'trivy-reports/trivy_scan_report.txt' - - - name: Run Trivy Image Scan - vllm-cpu - uses: aquasecurity/trivy-action@0.28.0 - continue-on-error: true - with: - scan-type: 'image' - image-ref: 'public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.2' - severity: 'HIGH,CRITICAL' - format: 'table' - output: 'trivy-reports/trivy-vllm-cpu.txt' - - - name: Upload Trivy Reports - if: always() - uses: actions/upload-artifact@v4 - with: - name: trivy-reports - path: trivy-reports/ - - - name: Show Trivy FS Report in Logs - if: always() - run: | - echo "========= TRIVY FS SCAN FINDINGS =========" - cat trivy-reports/trivy_scan_report.txt || echo "No FS scan report found" - echo "==========================================" - -# ----------------------------- -# 2) Bandit Scan -# ----------------------------- - bandit_scan: - name: Bandit security scan - runs-on: self-hosted - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.event.inputs.PR_number && format('refs/pull/{0}/merge', github.event.inputs.PR_number) || '' }} - submodules: 'recursive' - fetch-depth: 0 - - uses: actions/setup-python@v5 - with: - python-version: "3.x" - - name: Install Bandit - run: pip install bandit - - name: Create Bandit configuration - run: | - cat > .bandit << 'EOF' - [bandit] - exclude_dirs = tests,test,venv,.venv,node_modules - skips = B101 - EOF - shell: bash - - name: Run Bandit scan - run: | - bandit -r . -ll -iii -f screen - bandit -r . -ll -iii -f html -o bandit-report.html - - name: Upload Bandit Report - uses: actions/upload-artifact@v4 - with: - name: bandit-report - path: bandit-report.html - retention-days: 30 -# ----------------------------- -# 3) ShellCheck Scan -# ----------------------------- - shellcheck_scan: - name: ShellCheck script analysis - runs-on: self-hosted - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ github.event.inputs.PR_number && format('refs/pull/{0}/merge', github.event.inputs.PR_number) || '' }} - - - name: Create report directory - run: mkdir -p shellcheck-reports - - - name: Install ShellCheck - run: | - # Check if shellcheck is already installed - if ! command -v shellcheck &> /dev/null; then - wget -qO- "https://github.com/koalaman/shellcheck/releases/download/stable/shellcheck-stable.linux.x86_64.tar.xz" | tar -xJv - sudo cp shellcheck-stable/shellcheck /usr/local/bin/ - rm -rf shellcheck-stable - fi - shellcheck --version - - - name: Find shell scripts - id: find_scripts - run: | - SCRIPT_COUNT=$(find . -type f -name "*.sh" ! -path "./.git/*" | wc -l) - echo "Shell scripts found: $SCRIPT_COUNT" - echo "script_count=$SCRIPT_COUNT" >> $GITHUB_OUTPUT - - - name: Run ShellCheck - if: steps.find_scripts.outputs.script_count > 0 - continue-on-error: true - run: | - echo "ShellCheck Analysis Report" > shellcheck-reports/shellcheck-report.txt - echo "==========================" >> shellcheck-reports/shellcheck-report.txt - echo "" >> shellcheck-reports/shellcheck-report.txt - - find . -type f -name "*.sh" ! -path "./.git/*" | while read -r script; do - echo "Checking: $script" >> shellcheck-reports/shellcheck-report.txt - shellcheck -f gcc "$script" >> shellcheck-reports/shellcheck-report.txt 2>&1 || true - echo "" >> shellcheck-reports/shellcheck-report.txt - done - - cat shellcheck-reports/shellcheck-report.txt - - - name: Create empty report if no scripts - if: steps.find_scripts.outputs.script_count == 0 - run: | - echo "ShellCheck Analysis Report" > shellcheck-reports/shellcheck-report.txt - echo "No shell scripts found to analyze." >> shellcheck-reports/shellcheck-report.txt - - - name: Upload ShellCheck Report - if: always() - uses: actions/upload-artifact@v4 - with: - name: shellcheck-report - path: shellcheck-reports/shellcheck-report.txt \ No newline at end of file