Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions src/sagemaker/hyperpod/cluster_management/hp_cluster_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,12 +390,30 @@ def list(region: Optional[str] = None, stack_status_filter: Optional[List[str]]
"""
cf = create_boto3_client('cloudformation', region_name=region)

# All valid stack statuses except DELETE_COMPLETE, used to avoid paginating
# through tens of thousands of deleted stacks which causes throttling.
_ACTIVE_STACK_STATUSES = [
'CREATE_IN_PROGRESS', 'CREATE_FAILED', 'CREATE_COMPLETE',
'ROLLBACK_IN_PROGRESS', 'ROLLBACK_FAILED', 'ROLLBACK_COMPLETE',
'DELETE_IN_PROGRESS', 'DELETE_FAILED',
'UPDATE_IN_PROGRESS', 'UPDATE_COMPLETE_CLEANUP_IN_PROGRESS',
'UPDATE_COMPLETE', 'UPDATE_FAILED',
'UPDATE_ROLLBACK_IN_PROGRESS', 'UPDATE_ROLLBACK_FAILED',
'UPDATE_ROLLBACK_COMPLETE_CLEANUP_IN_PROGRESS', 'UPDATE_ROLLBACK_COMPLETE',
'REVIEW_IN_PROGRESS', 'IMPORT_IN_PROGRESS', 'IMPORT_COMPLETE',
'IMPORT_ROLLBACK_IN_PROGRESS', 'IMPORT_ROLLBACK_FAILED', 'IMPORT_ROLLBACK_COMPLETE',
]

try:
# Prepare API call parameters
list_params = {}

if stack_status_filter is not None:
list_params['StackStatusFilter'] = stack_status_filter
else:
# Exclude DELETE_COMPLETE at the API level to avoid paginating through
# large numbers of deleted stacks, which causes throttling errors.
list_params['StackStatusFilter'] = _ACTIVE_STACK_STATUSES

response = cf.list_stacks(**list_params)

Expand All @@ -406,13 +424,6 @@ def list(region: Optional[str] = None, stack_status_filter: Optional[List[str]]
response = cf.list_stacks(**list_params)
all_summaries.extend(response.get('StackSummaries', []))

# Only filter DELETE_COMPLETE when no explicit filter is provided
if stack_status_filter is None:
all_summaries = [
stack for stack in all_summaries
if stack.get('StackStatus') != 'DELETE_COMPLETE'
]

return {'StackSummaries': all_summaries}
except cf.exceptions.ClientError as e:
error_code = e.response['Error']['Code']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,17 @@ def stack_name(self):
@pytest.mark.dependency(name="list_stacks")
def test_list_stacks(self):
"""Test listing CloudFormation stacks using HpClusterStack.list."""
# Test listing stacks - should return a response with StackSummaries
response = HpClusterStack.list()

# Verify response structure

assert isinstance(response, dict)
assert 'StackSummaries' in response
assert isinstance(response['StackSummaries'], list)

# If there are stacks, verify they have expected fields

# Verify DELETE_COMPLETE stacks are excluded at the API level
for stack in response['StackSummaries']:
assert stack.get('StackStatus') != 'DELETE_COMPLETE', \
f"DELETE_COMPLETE stack found in results: {stack['StackName']}"

if response['StackSummaries']:
stack = response['StackSummaries'][0]
assert 'StackName' in stack
Expand Down
54 changes: 28 additions & 26 deletions test/unit_tests/cluster_management/test_hp_cluster_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,31 @@ def test_describe_access_denied(self, mock_boto3_client, mock_boto3_session):
def test_list_success(self, mock_create_client):
mock_cf_client = MagicMock()
mock_create_client.return_value = mock_cf_client
mock_response = {'StackSummaries': [{'StackName': 'stack1'}, {'StackName': 'stack2'}]}

mock_response = {'StackSummaries': [{'StackName': 'stack1', 'StackStatus': 'CREATE_COMPLETE'}]}
mock_cf_client.list_stacks.return_value = mock_response

result = HpClusterStack.list()

mock_create_client.assert_called_once_with('cloudformation', region_name=None)
mock_cf_client.list_stacks.assert_called_once()
# Verify DELETE_COMPLETE is excluded at the API level
call_kwargs = mock_cf_client.list_stacks.call_args[1]
self.assertIn('StackStatusFilter', call_kwargs)
self.assertNotIn('DELETE_COMPLETE', call_kwargs['StackStatusFilter'])
self.assertEqual(result, mock_response)

@patch('sagemaker.hyperpod.cluster_management.hp_cluster_stack.create_boto3_client')
def test_list_with_explicit_status_filter(self, mock_create_client):
"""Explicit StackStatusFilter is passed through as-is."""
mock_cf_client = MagicMock()
mock_create_client.return_value = mock_cf_client
mock_cf_client.list_stacks.return_value = {'StackSummaries': []}

HpClusterStack.list(stack_status_filter=['CREATE_COMPLETE'])

call_kwargs = mock_cf_client.list_stacks.call_args[1]
self.assertEqual(call_kwargs['StackStatusFilter'], ['CREATE_COMPLETE'])

@patch('boto3.session.Session')
@patch('boto3.client')
def test_list_access_denied(self, mock_boto3_client, mock_boto3_session):
Expand Down Expand Up @@ -479,29 +494,16 @@ class TestHpClusterStackList(unittest.TestCase):

@patch('sagemaker.hyperpod.cluster_management.hp_cluster_stack.create_boto3_client')
def test_list_default_filters_delete_complete(self, mock_create_client):
"""Test that list() filters out DELETE_COMPLETE stacks by default."""
# Arrange
"""Test that list() excludes DELETE_COMPLETE at the API level via StackStatusFilter."""
mock_cf_client = MagicMock()
mock_create_client.return_value = mock_cf_client

mock_response = {
'StackSummaries': [
{'StackName': 'active-stack', 'StackStatus': 'CREATE_COMPLETE'},
{'StackName': 'deleted-stack', 'StackStatus': 'DELETE_COMPLETE'},
{'StackName': 'updating-stack', 'StackStatus': 'UPDATE_IN_PROGRESS'}
]
}
mock_cf_client.list_stacks.return_value = mock_response

# Act
result = HpClusterStack.list()

# Assert
assert len(result['StackSummaries']) == 2
stack_names = [stack['StackName'] for stack in result['StackSummaries']]
assert 'active-stack' in stack_names
assert 'updating-stack' in stack_names
assert 'deleted-stack' not in stack_names
mock_cf_client.list_stacks.return_value = {'StackSummaries': []}

HpClusterStack.list()

call_kwargs = mock_cf_client.list_stacks.call_args[1]
self.assertIn('StackStatusFilter', call_kwargs)
self.assertNotIn('DELETE_COMPLETE', call_kwargs['StackStatusFilter'])

@patch('sagemaker.hyperpod.cluster_management.hp_cluster_stack.create_boto3_client')
def test_list_paginates_through_all_pages(self, mock_create_client):
Expand Down
Loading