diff --git a/sw/nic/gpuagent/api/include/aga_gpu.hpp b/sw/nic/gpuagent/api/include/aga_gpu.hpp index 917217e..031f37d 100644 --- a/sw/nic/gpuagent/api/include/aga_gpu.hpp +++ b/sw/nic/gpuagent/api/include/aga_gpu.hpp @@ -404,6 +404,8 @@ typedef struct aga_gpu_vram_status_s { char vendor[AGA_MAX_STR_LEN + 1]; /// VRAM size (in MB) uint64_t size; + /// VRAM max bandwidth at max memory clock (GB/s) + uint64_t max_bandwidth; } aga_gpu_vram_status_t; /// \brief GPU page status diff --git a/sw/nic/gpuagent/api/smi/amdsmi/smi_api.cc b/sw/nic/gpuagent/api/smi/amdsmi/smi_api.cc index 3b987dd..366e8ef 100644 --- a/sw/nic/gpuagent/api/smi/amdsmi/smi_api.cc +++ b/sw/nic/gpuagent/api/smi/amdsmi/smi_api.cc @@ -763,6 +763,7 @@ smi_gpu_fill_status (aga_gpu_handle_t gpu_handle, uint32_t gpu_id, } status->xgmi_status.width = metrics_info.xgmi_link_width; status->xgmi_status.speed = metrics_info.xgmi_link_speed; + status->vram_status.max_bandwidth = metrics_info.vram_max_bandwidth; } else { AGA_TRACE_ERR("Failed to get GPU metrics info for GPU {}, err {}", gpu_handle, amdsmi_ret); diff --git a/sw/nic/gpuagent/api/smi/smi_api_mock.cc b/sw/nic/gpuagent/api/smi/smi_api_mock.cc index 2dc28e8..e14d466 100644 --- a/sw/nic/gpuagent/api/smi/smi_api_mock.cc +++ b/sw/nic/gpuagent/api/smi/smi_api_mock.cc @@ -245,6 +245,8 @@ smi_gpu_fill_status (aga_gpu_handle_t gpu_handle, uint32_t gpu_id, status->vram_status.type = AGA_VRAM_TYPE_HBM; strcpy(status->vram_status.vendor, "hynix"); status->vram_status.size = 196592; + // fill VRAM max bandwidth mock value + status->vram_status.max_bandwidth = 3276800; // fill the xgmi error count status->xgmi_status.error_status = AGA_GPU_XGMI_STATUS_NO_ERROR; // fill total memory diff --git a/sw/nic/gpuagent/cli/cmd/gpu.go b/sw/nic/gpuagent/cli/cmd/gpu.go index 4ef2576..7242fcc 100644 --- a/sw/nic/gpuagent/cli/cmd/gpu.go +++ b/sw/nic/gpuagent/cli/cmd/gpu.go @@ -1042,6 +1042,11 @@ func printGPUStatus(gpu *aga.GPU, statusOnly bool) { fmt.Printf(indent+" %-36s : %v\n", "VRAM size (in MB)", vram.GetSize_()) } + if vram.GetMaxBandwidth() != 0 { + printVRAMStatusHdr(indent) + fmt.Printf(indent+" %-36s : %d\n", "VRAM max bandwidth (in GB/s)", + vram.GetMaxBandwidth()) + } } if statusOnly { fmt.Printf("\n%s\n", strings.Repeat("-", 90)) diff --git a/sw/nic/gpuagent/protos/gpu.proto b/sw/nic/gpuagent/protos/gpu.proto index e9d83eb..94043a9 100644 --- a/sw/nic/gpuagent/protos/gpu.proto +++ b/sw/nic/gpuagent/protos/gpu.proto @@ -322,11 +322,13 @@ enum GPUThrottlingStatus { message GPUVRAMStatus { // VRAM memory type - VRAMType Type = 1; + VRAMType Type = 1; // VRAM vendor - string Vendor = 2; + string Vendor = 2; // VRAM size (in MB) - uint64 Size = 3; + uint64 Size = 3; + // VRAM max bandwidth at max memory clock (GB/s) + uint64 MaxBandwidth = 4; } // GPU page status diff --git a/sw/nic/gpuagent/svc/gpu_to_proto.hpp b/sw/nic/gpuagent/svc/gpu_to_proto.hpp index 034b057..b17c484 100644 --- a/sw/nic/gpuagent/svc/gpu_to_proto.hpp +++ b/sw/nic/gpuagent/svc/gpu_to_proto.hpp @@ -366,6 +366,7 @@ aga_gpu_vram_status_to_proto (GPUVRAMStatus *proto_status, proto_status->set_type(aga_gpu_vram_type_to_proto(status->type)); proto_status->set_vendor(status->vendor); proto_status->set_size(status->size); + proto_status->set_maxbandwidth(status->max_bandwidth); } static inline amdgpu::GPUPageStatus