From 0f122b281f03667f832782548f7ea2717bf0918c Mon Sep 17 00:00:00 2001
From: Soren Dreano <soren@numind.ai>
Date: Tue, 19 Aug 2025 11:05:17 +0200
Subject: [PATCH] feat: add checkpoint_enabled parameter to the VLLM class

passing checkpoint_enabled to the super() call as the
parent ASGI class already supports it
---
 sdk/src/beta9/abstractions/integrations/vllm.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sdk/src/beta9/abstractions/integrations/vllm.py b/sdk/src/beta9/abstractions/integrations/vllm.py
index a5a2bfbd8..f387fa519 100644
--- a/sdk/src/beta9/abstractions/integrations/vllm.py
+++ b/sdk/src/beta9/abstractions/integrations/vllm.py
@@ -172,7 +172,7 @@ class VLLM(ASGI):
         vllm_version (str):
             The version of vLLM that will be installed from PyPI. As the configuration of the vLLM engine depends on the version of vLLM, using a non-default vllm_version might require subclassing VLLMArgs in order to add the missing configuration options. Default is version 0.8.4.
         huggingface_hub_version (str):
-            The version of huggingface_hub that will be installed from PyPI. Different versions of vLLM require different versions of huggingface_hub, thus using a non-default vLLM version might require using a non-default version of huggingface_hub.  Default is version 0.30.2.
+            The version of huggingface_hub that will be installed from PyPI. Different versions of vLLM require different versions of huggingface_hub, thus using a non-default vLLM version might require using a non-default version of huggingface_hub. Default is version 0.30.2.
         workers (int):
             The number of workers to run in the container. Default is 1.
         concurrent_requests (int):
@@ -194,6 +194,8 @@ class VLLM(ASGI):
             The secrets to pass to the container. If you need huggingface authentication to download models, you should set HF_TOKEN in the secrets.
         autoscaler (Autoscaler):
             The autoscaler to use. Default is a queue depth autoscaler.
+        checkpoint_enabled (bool):
+            Whether to enable checkpointing for the endpoint. Default is False. If enabled, the app will be checkpointed after the on_start function has completed. On next invocation, each container will restore from a checkpoint and resume execution instead of booting up from cold.
         vllm_args (VLLMArgs):
             The arguments for the vLLM model.
 
@@ -228,6 +230,7 @@ def __init__(
         volumes: Optional[List[Union[Volume, CloudBucket]]] = [],
         secrets: Optional[List[str]] = None,
         autoscaler: Autoscaler = QueueDepthAutoscaler(),
+        checkpoint_enabled: bool = False,
         vllm_args: VLLMArgs = VLLMArgs(),
     ):
         if vllm_args.download_dir == DEFAULT_VLLM_CACHE_DIR:
@@ -261,6 +264,7 @@ def __init__(
             volumes=volumes,
             secrets=secrets,
             autoscaler=autoscaler,
+            checkpoint_enabled=checkpoint_enabled,
         )
 
         self.chat_template_url = vllm_args.chat_template_url