Skip to content

Commit edd10f2

Browse files
committed
arch: get shm max size on device
1 parent 55f84b2 commit edd10f2

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

devito/arch/archinfo.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,32 @@ def march(self):
10691069
return 'tesla'
10701070
return None
10711071

1072+
@cached_property
1073+
def max_shm_per_block(self):
1074+
"""
1075+
Get the maximum amount of shared memory per thread block
1076+
"""
1077+
# Load libcudart
1078+
libname = ctypes.util.find_library("cudart")
1079+
if not libname:
1080+
raise RuntimeError("cudart library not found")
1081+
lib = ctypes.CDLL(libname)
1082+
1083+
cudaDevAttrMaxSharedMemoryPerBlockOptin = 97
1084+
# get current device
1085+
dev = ctypes.c_int()
1086+
lib.cudaGetDevice(ctypes.byref(dev))
1087+
1088+
# query attribute
1089+
value = ctypes.c_int()
1090+
lib.cudaDeviceGetAttribute(
1091+
ctypes.byref(value),
1092+
ctypes.c_int(cudaDevAttrMaxSharedMemoryPerBlockOptin),
1093+
dev
1094+
)
1095+
1096+
return value.value
1097+
10721098
def supports(self, query, language=None):
10731099
if language != 'cuda':
10741100
return False
@@ -1125,6 +1151,8 @@ class AmdDevice(Device):
11251151

11261152
max_mem_trans_nbytes = 256
11271153

1154+
max_shm_per_block = 64*1024 # 64 KB
1155+
11281156
@cached_property
11291157
def march(cls):
11301158
# TODO: this corresponds to Vega, which acts as the fallback `march`

0 commit comments

Comments
 (0)