diff --git a/.release-please-manifest.json b/.release-please-manifest.json index cff01f26..b2585653 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "2.15.0" + ".": "2.16.0" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index e8f2d826..81927965 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 81 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-ce108a2095d36552bb556506de04475674f512a13bc5aa099e9750993405be14.yml -openapi_spec_hash: 4763dd426dd805306bbb38a314158cd3 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-891d6a19c1efb39d010d53a8965fd314acf279fcb09f7a6622df5fc6ce779bec.yml +openapi_spec_hash: 4554c2095bf588ac4a195d038f3893c1 config_hash: b35d5968fb07cce1c1be735f874898b1 diff --git a/CHANGELOG.md b/CHANGELOG.md index c6d0c9f7..665a4dc1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # Changelog +## 2.16.0 (2026-05-22) + +Full Changelog: [v2.15.0...v2.16.0](https://github.com/togethercomputer/together-py/compare/v2.15.0...v2.16.0) + +### Features + +* **cli:** expose new cluster SDK parameters ([#378](https://github.com/togethercomputer/together-py/issues/378)) ([b694d8f](https://github.com/togethercomputer/together-py/commit/b694d8f261da15835257bb0352e1ba312b4127d3)) + + +### Documentation + +* **api:** add size and duration limits to file parameter in audio transcriptions/translations ([cce54f2](https://github.com/togethercomputer/together-py/commit/cce54f2362fd8dba1205eccf89f6909be74e2c37)) + ## 2.15.0 (2026-05-20) Full Changelog: [v2.14.0...v2.15.0](https://github.com/togethercomputer/together-py/compare/v2.14.0...v2.15.0) diff --git a/pyproject.toml b/pyproject.toml index 5bf28644..270b1dc4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "together" -version = "2.15.0" +version = "2.16.0" description = "The official Python library for the together API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/together/_version.py b/src/together/_version.py index 6656537c..63216a8c 100644 --- a/src/together/_version.py +++ b/src/together/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "together" -__version__ = "2.15.0" # x-release-please-version +__version__ = "2.16.0" # x-release-please-version diff --git a/src/together/lib/cli/api/beta/clusters/create.py b/src/together/lib/cli/api/beta/clusters/create.py index 83fca202..dbfccb35 100644 --- a/src/together/lib/cli/api/beta/clusters/create.py +++ b/src/together/lib/cli/api/beta/clusters/create.py @@ -14,7 +14,8 @@ NumGpusParameter = Annotated[Optional[int], Parameter(help="Number of GPUs to allocate in the cluster")] RegionParameter = Annotated[Optional[str], Parameter(help="Region to create the cluster in")] BillingTypeParameter = Annotated[ - Optional[Literal["RESERVED", "ON_DEMAND"]], Parameter(help="Billing type to use for the cluster") + Optional[Literal["RESERVED", "ON_DEMAND", "SCHEDULED_CAPACITY"]], + Parameter(help="Billing type to use for the cluster"), ] NvidiaDriverVersionParameter = Annotated[Optional[str], Parameter(help="Nvidia driver version to use for the cluster")] CudaVersionParameter = Annotated[Optional[str], Parameter(help="CUDA version to use for the cluster")] @@ -27,6 +28,25 @@ ] ClusterTypeParameter = Annotated[Optional[Literal["KUBERNETES", "SLURM"]], Parameter(help="Cluster type")] VolumeParameter = Annotated[Optional[str], Parameter(help="Storage volume ID to use for the cluster")] +AutoScaleParameter = Annotated[Optional[bool], Parameter(help="Enable cluster auto-scaling")] +AutoScaleMaxGpusParameter = Annotated[Optional[int], Parameter(help="Maximum GPUs for auto-scaling")] +CapacityPoolIDParameter = Annotated[Optional[str], Parameter(help="Capacity pool ID to use for the cluster")] +GpuNodeFailoverEnabledParameter = Annotated[ + Optional[bool], Parameter(help="Enable automated GPU node failover for the cluster") +] +InstallTraefikParameter = Annotated[Optional[bool], Parameter(help="Install Traefik ingress controller")] +NumCapacityPoolGpusParameter = Annotated[ + Optional[int], Parameter(help="Number of GPUs to allocate from a capacity pool") +] +NumPreemptibleGpusParameter = Annotated[Optional[int], Parameter(help="Number of preemptible GPUs to request")] +NumReservedGpusParameter = Annotated[Optional[int], Parameter(help="Number of prepaid reserved GPUs to request")] +ProjectIDParameter = Annotated[Optional[str], Parameter(help="Project ID for the cluster")] +ReservationEndTimeParameter = Annotated[Optional[str], Parameter(help="Reservation end time for scheduled capacity")] +ReservationStartTimeParameter = Annotated[ + Optional[str], Parameter(help="Reservation start time for scheduled capacity") +] +SlurmImageParameter = Annotated[Optional[str], Parameter(help="Custom Slurm image for Slurm clusters")] +SlurmShmSizeGibParameter = Annotated[Optional[int], Parameter(help="Shared memory size in GiB for Slurm clusters")] async def create( @@ -40,6 +60,19 @@ async def create( gpu_type: GpuTypeParameter = None, cluster_type: ClusterTypeParameter = None, volume: VolumeParameter = None, + auto_scale: AutoScaleParameter = None, + auto_scale_max_gpus: AutoScaleMaxGpusParameter = None, + capacity_pool_id: CapacityPoolIDParameter = None, + gpu_node_failover_enabled: GpuNodeFailoverEnabledParameter = None, + install_traefik: InstallTraefikParameter = None, + num_capacity_pool_gpus: NumCapacityPoolGpusParameter = None, + num_preemptible_gpus: NumPreemptibleGpusParameter = None, + num_reserved_gpus: NumReservedGpusParameter = None, + project_id: ProjectIDParameter = None, + reservation_end_time: ReservationEndTimeParameter = None, + reservation_start_time: ReservationStartTimeParameter = None, + slurm_image: SlurmImageParameter = None, + slurm_shm_size_gib: SlurmShmSizeGibParameter = None, *, config: CLIConfigParameter, ) -> None: @@ -57,6 +90,32 @@ async def create( ) if volume: params["volume_id"] = volume + if auto_scale is not None: + params["auto_scale"] = auto_scale + if auto_scale_max_gpus is not None: + params["auto_scale_max_gpus"] = auto_scale_max_gpus + if capacity_pool_id: + params["capacity_pool_id"] = capacity_pool_id + if gpu_node_failover_enabled is not None: + params["gpu_node_failover_enabled"] = gpu_node_failover_enabled + if install_traefik is not None: + params["install_traefik"] = install_traefik + if num_capacity_pool_gpus is not None: + params["num_capacity_pool_gpus"] = num_capacity_pool_gpus + if num_preemptible_gpus is not None: + params["num_preemptible_gpus"] = num_preemptible_gpus + if num_reserved_gpus is not None: + params["num_reserved_gpus"] = num_reserved_gpus + if project_id: + params["project_id"] = project_id + if reservation_end_time: + params["reservation_end_time"] = reservation_end_time + if reservation_start_time: + params["reservation_start_time"] = reservation_start_time + if slurm_image: + params["slurm_image"] = slurm_image + if slurm_shm_size_gib is not None: + params["slurm_shm_size_gib"] = slurm_shm_size_gib # JSON Mode skips hand holding through the argument setup if not config.json and not config.non_interactive: diff --git a/src/together/lib/cli/api/beta/clusters/storage/create.py b/src/together/lib/cli/api/beta/clusters/storage/create.py index fa3cc408..53d7ced8 100644 --- a/src/together/lib/cli/api/beta/clusters/storage/create.py +++ b/src/together/lib/cli/api/beta/clusters/storage/create.py @@ -1,9 +1,10 @@ from __future__ import annotations -from typing import Annotated +from typing import Optional, Annotated from cyclopts import Parameter +from together import omit from together._utils._json import openapi_dumps from together.lib.cli.utils.config import CLIConfigParameter from together.lib.cli.utils._console import console @@ -13,6 +14,10 @@ async def create( region: Annotated[str, Parameter(help="Region to create the storage volume in")], size_tib: Annotated[int, Parameter(help="Size of the storage volume in TiB")], volume_name: Annotated[str, Parameter(help="Name of the storage volume")], + is_lifecycle_independent: Annotated[ + Optional[bool], + Parameter(help="Keep the storage volume after cluster decommissioning"), + ] = None, *, config: CLIConfigParameter, ) -> None: @@ -21,6 +26,7 @@ async def create( region=region, size_tib=size_tib, volume_name=volume_name, + is_lifecycle_independent=is_lifecycle_independent if is_lifecycle_independent is not None else omit, ) if config.json: diff --git a/src/together/lib/cli/api/beta/clusters/storage/update.py b/src/together/lib/cli/api/beta/clusters/storage/update.py index 59e00da1..a5d1ffbd 100644 --- a/src/together/lib/cli/api/beta/clusters/storage/update.py +++ b/src/together/lib/cli/api/beta/clusters/storage/update.py @@ -1,9 +1,10 @@ from __future__ import annotations -from typing import Annotated +from typing import Optional, Annotated from cyclopts import Parameter +from together import omit from together._utils._json import openapi_dumps from together.lib.cli.utils.config import CLIConfigParameter from together.lib.cli.utils._console import console @@ -11,14 +12,14 @@ async def update( volume_id: str, - size_tib: Annotated[int, Parameter(help="New size of the storage volume in TiB")], + size_tib: Annotated[Optional[int], Parameter(help="New size of the storage volume in TiB")] = None, *, config: CLIConfigParameter, ) -> None: """Update a storage volume (resize).""" response = await config.client.beta.clusters.storage.update( volume_id=volume_id, - size_tib=size_tib, + size_tib=size_tib if size_tib is not None else omit, ) if config.json: diff --git a/src/together/lib/cli/api/beta/clusters/update.py b/src/together/lib/cli/api/beta/clusters/update.py index 03612c35..739a228c 100644 --- a/src/together/lib/cli/api/beta/clusters/update.py +++ b/src/together/lib/cli/api/beta/clusters/update.py @@ -18,6 +18,18 @@ async def update( cluster_type: Annotated[ Optional[Literal["KUBERNETES", "SLURM"]], Parameter(help="Type of cluster to update") ] = None, + num_preemptible_gpus: Annotated[ + Optional[int], + Parameter(help="Desired number of preemptible GPUs for the cluster"), + ] = None, + num_reserved_gpus: Annotated[ + Optional[int], + Parameter(help="Desired number of reserved GPUs for the cluster"), + ] = None, + reservation_end_time: Annotated[ + Optional[str], + Parameter(help="Timestamp at which the cluster should be decommissioned"), + ] = None, *, config: CLIConfigParameter, ) -> None: @@ -29,6 +41,9 @@ async def update( cluster_id, num_gpus=num_gpus if num_gpus is not None else omit, cluster_type=cluster_type if cluster_type is not None else omit, + num_preemptible_gpus=num_preemptible_gpus if num_preemptible_gpus is not None else omit, + num_reserved_gpus=num_reserved_gpus if num_reserved_gpus is not None else omit, + reservation_end_time=reservation_end_time or omit, ), ) diff --git a/src/together/resources/audio/transcriptions.py b/src/together/resources/audio/transcriptions.py index a2f1980a..4127d471 100644 --- a/src/together/resources/audio/transcriptions.py +++ b/src/together/resources/audio/transcriptions.py @@ -69,8 +69,10 @@ def create( Transcribes audio into text Args: - file: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, - .webm, .flac, .ogg, .opus, .aac. + file: Audio file upload or public HTTP/HTTPS URL. Supported formats: .wav, .mp3, .m4a, + .webm, .flac, .ogg, .opus, .aac. Maximum duration 4 hours; longer audio is + rejected with `audio_too_long`. Binary uploads are additionally capped at 500 MB + (HTTP 413); URL-fetched audio is capped at 1 GB. diarize: Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription. In the response, in the words array, you @@ -195,8 +197,10 @@ async def create( Transcribes audio into text Args: - file: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, - .webm, .flac, .ogg, .opus, .aac. + file: Audio file upload or public HTTP/HTTPS URL. Supported formats: .wav, .mp3, .m4a, + .webm, .flac, .ogg, .opus, .aac. Maximum duration 4 hours; longer audio is + rejected with `audio_too_long`. Binary uploads are additionally capped at 500 MB + (HTTP 413); URL-fetched audio is capped at 1 GB. diarize: Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription. In the response, in the words array, you diff --git a/src/together/resources/audio/translations.py b/src/together/resources/audio/translations.py index 326b5de4..ba318cf4 100644 --- a/src/together/resources/audio/translations.py +++ b/src/together/resources/audio/translations.py @@ -66,8 +66,10 @@ def create( Translates audio into English Args: - file: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, - .webm, .flac, .ogg, .opus, .aac. + file: Audio file upload or public HTTP/HTTPS URL. Supported formats: .wav, .mp3, .m4a, + .webm, .flac, .ogg, .opus, .aac. Maximum duration 4 hours; longer audio is + rejected with `audio_too_long`. Binary uploads are additionally capped at 500 MB + (HTTP 413); URL-fetched audio is capped at 1 GB. language: Target output language. Optional ISO 639-1 language code. If omitted, language is set to English. @@ -169,8 +171,10 @@ async def create( Translates audio into English Args: - file: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, - .webm, .flac, .ogg, .opus, .aac. + file: Audio file upload or public HTTP/HTTPS URL. Supported formats: .wav, .mp3, .m4a, + .webm, .flac, .ogg, .opus, .aac. Maximum duration 4 hours; longer audio is + rejected with `audio_too_long`. Binary uploads are additionally capped at 500 MB + (HTTP 413); URL-fetched audio is capped at 1 GB. language: Target output language. Optional ISO 639-1 language code. If omitted, language is set to English. diff --git a/src/together/types/audio/transcription_create_params.py b/src/together/types/audio/transcription_create_params.py index 82522400..e880c818 100644 --- a/src/together/types/audio/transcription_create_params.py +++ b/src/together/types/audio/transcription_create_params.py @@ -14,7 +14,10 @@ class TranscriptionCreateParams(TypedDict, total=False): file: Required[Union[FileTypes, str]] """Audio file upload or public HTTP/HTTPS URL. - Supported formats .wav, .mp3, .m4a, .webm, .flac, .ogg, .opus, .aac. + Supported formats: .wav, .mp3, .m4a, .webm, .flac, .ogg, .opus, .aac. Maximum + duration 4 hours; longer audio is rejected with `audio_too_long`. Binary uploads + are additionally capped at 500 MB (HTTP 413); URL-fetched audio is capped at 1 + GB. """ diarize: bool diff --git a/src/together/types/audio/translation_create_params.py b/src/together/types/audio/translation_create_params.py index 21896fdf..3c843ed2 100644 --- a/src/together/types/audio/translation_create_params.py +++ b/src/together/types/audio/translation_create_params.py @@ -14,7 +14,10 @@ class TranslationCreateParams(TypedDict, total=False): file: Required[Union[FileTypes, str]] """Audio file upload or public HTTP/HTTPS URL. - Supported formats .wav, .mp3, .m4a, .webm, .flac, .ogg, .opus, .aac. + Supported formats: .wav, .mp3, .m4a, .webm, .flac, .ogg, .opus, .aac. Maximum + duration 4 hours; longer audio is rejected with `audio_too_long`. Binary uploads + are additionally capped at 500 MB (HTTP 413); URL-fetched audio is capped at 1 + GB. """ language: str diff --git a/tests/cli/test_beta_clusters.py b/tests/cli/test_beta_clusters.py index ead92334..7778bc86 100644 --- a/tests/cli/test_beta_clusters.py +++ b/tests/cli/test_beta_clusters.py @@ -158,6 +158,75 @@ def test_create_non_interactive_posts_expected_body(self, respx_mock: MockRouter assert body["billing_type"] == "ON_DEMAND" assert result.exit_code == 0 + @pytest.mark.respx(base_url=base_url) + def test_create_accepts_new_cluster_params(self, respx_mock: MockRouter, cli_runner: CliRunner) -> None: + created = _cluster_body("new-id", "scheduled") + route = respx_mock.post("/compute/clusters").mock(return_value=httpx.Response(200, json=created)) + result = cli_runner.invoke( + [ + "beta", + "clusters", + "create", + "--non-interactive", + "--cluster-type", + "SLURM", + "--gpu-type", + "H100_SXM", + "--nvidia-driver-version", + "565", + "--cuda-version", + "12.6", + "--region", + "us-central-8", + "--num-gpus", + "8", + "--billing-type", + "SCHEDULED_CAPACITY", + "--name", + "scheduled", + "--auto-scale", + "--auto-scale-max-gpus", + "16", + "--capacity-pool-id", + "pool-1", + "--gpu-node-failover-enabled", + "--install-traefik", + "--num-capacity-pool-gpus", + "8", + "--num-preemptible-gpus", + "8", + "--num-reserved-gpus", + "8", + "--project-id", + "proj-1", + "--reservation-start-time", + "2026-06-01T00:00:00Z", + "--reservation-end-time", + "2026-06-02T00:00:00Z", + "--slurm-image", + "slurm:latest", + "--slurm-shm-size-gib", + "32", + ], + ) + + body = json.loads(cast(Call, route.calls[0]).request.content.decode()) + assert body["billing_type"] == "SCHEDULED_CAPACITY" + assert body["auto_scale"] is True + assert body["auto_scale_max_gpus"] == 16 + assert body["capacity_pool_id"] == "pool-1" + assert body["gpu_node_failover_enabled"] is True + assert body["install_traefik"] is True + assert body["num_capacity_pool_gpus"] == 8 + assert body["num_preemptible_gpus"] == 8 + assert body["num_reserved_gpus"] == 8 + assert body["project_id"] == "proj-1" + assert body["reservation_start_time"] == "2026-06-01T00:00:00Z" + assert body["reservation_end_time"] == "2026-06-02T00:00:00Z" + assert body["slurm_image"] == "slurm:latest" + assert body["slurm_shm_size_gib"] == 32 + assert result.exit_code == 0 + class TestBetaClustersUpdate: @pytest.mark.respx(base_url=base_url) @@ -176,6 +245,31 @@ def test_update_json_triggers_put_and_second_get(self, respx_mock: MockRouter, c assert put_body["cluster_type"] == "SLURM" assert result.exit_code == 0 + @pytest.mark.respx(base_url=base_url) + def test_update_accepts_new_cluster_params(self, respx_mock: MockRouter, cli_runner: CliRunner) -> None: + updated = _cluster_body("c1", num_gpus=16) + put = respx_mock.put("/compute/clusters/c1").mock(return_value=httpx.Response(200, json=updated)) + result = cli_runner.invoke( + [ + "beta", + "clusters", + "update", + "c1", + "--num-preemptible-gpus", + "8", + "--num-reserved-gpus", + "16", + "--reservation-end-time", + "2026-06-02T00:00:00Z", + ], + ) + + put_body = json.loads(cast(Call, put.calls[0]).request.content.decode()) + assert put_body["num_preemptible_gpus"] == 8 + assert put_body["num_reserved_gpus"] == 16 + assert put_body["reservation_end_time"] == "2026-06-02T00:00:00Z" + assert result.exit_code == 0 + class TestBetaClustersDelete: @pytest.mark.respx(base_url=base_url) @@ -234,13 +328,29 @@ def test_storage_create_json(self, respx_mock: MockRouter, cli_runner: CliRunner "1", "--volume-name", "test-volume", + "--is-lifecycle-independent", "--json", ], ) out = json.loads(result.output) assert out["volume_id"] == "vol-1" raw = cast(Call, route.calls[0]).request.content.decode() - assert json.loads(raw) == {"region": "us-east-1", "size_tib": 1, "volume_name": "test-volume"} + assert json.loads(raw) == { + "region": "us-east-1", + "size_tib": 1, + "volume_name": "test-volume", + "is_lifecycle_independent": True, + } + assert result.exit_code == 0 + + @pytest.mark.respx(base_url=base_url) + def test_storage_update_allows_omitting_size(self, respx_mock: MockRouter, cli_runner: CliRunner) -> None: + route = respx_mock.put("/compute/clusters/storage/volumes").mock( + return_value=httpx.Response(200, json=_VOLUME_BODY) + ) + result = cli_runner.invoke(["beta", "clusters", "storage", "update", "vol-1", "--json"]) + + assert json.loads(cast(Call, route.calls[0]).request.content.decode()) == {"volume_id": "vol-1"} assert result.exit_code == 0 @pytest.mark.respx(base_url=base_url)