-
Notifications
You must be signed in to change notification settings - Fork 2
Open
Description
Tested on a 5060, the version of libtorch we have appears to not support compute capability 12:
https://discuss.pytorch.org/t/pytorch-support-for-sm-120-nvidia-geforce-rtx-5060/220941
This is a no-Python script authored by ChatGPT that tests the operation of libtorch directly. It works on an A1000 6GB laptop, but fails on the 5060:
#!/usr/bin/env bash
set -euo pipefail
# Point this to your CUDA-enabled LibTorch folder
: "${LIBTORCH:=/usr/local}"
cat > torchscript_cuda_sanity.cpp <<'CPP'
#include <torch/torch.h>
#include <torch/script.h>
#include <iostream>
#include <vector>
static void check(bool cond, const char* msg) {
if (!cond) {
std::cerr << "ERROR: " << msg << "\n";
std::exit(1);
}
}
int main() {
std::cout << "=== LibTorch TorchScript CUDA sanity ===\n";
std::cout << "Torch version: " << TORCH_VERSION << "\n";
bool cuda_available = torch::cuda::is_available();
std::cout << "CUDA available: " << (cuda_available ? "true" : "false") << "\n";
if (cuda_available) {
std::cout << "CUDA device count: " << torch::cuda::device_count() << "\n";
}
// TorchScript code: a Module with forward(a, b) = a + b
const std::string ts_src = R"TS(
def forward(self, a: Tensor, b: Tensor) -> Tensor:
return a + b
)TS";
torch::jit::Module m("AddModule");
m.define(ts_src);
// CPU test (double tensors)
auto a_cpu = torch::tensor({1.5, 2.25, -3.0}, torch::dtype(torch::kFloat64).device(torch::kCPU));
auto b_cpu = torch::tensor({3.0, 4.75, 7.0}, torch::dtype(torch::kFloat64).device(torch::kCPU));
auto out_cpu_iv = m.forward({a_cpu, b_cpu});
auto out_cpu = out_cpu_iv.toTensor();
std::cout << "\nCPU output: " << out_cpu << "\n";
// Basic correctness check on CPU
auto expected_cpu = a_cpu + b_cpu;
check(out_cpu.equal(expected_cpu), "CPU TorchScript result mismatch");
if (!cuda_available) {
std::cout << "\nCUDA not available in this build/runtime. CPU sanity PASSED.\n";
return 0;
}
// CUDA test (double tensors)
auto a_gpu = a_cpu.to(torch::kCUDA);
auto b_gpu = b_cpu.to(torch::kCUDA);
auto out_gpu_iv = m.forward({a_gpu, b_gpu});
auto out_gpu = out_gpu_iv.toTensor();
// Force sync to surface kernel/runtime problems immediately
torch::cuda::synchronize();
std::cout << "GPU output: " << out_gpu.cpu() << "\n";
// Compare CPU vs GPU
check(out_gpu.cpu().equal(out_cpu), "GPU TorchScript output != CPU output");
// Extra CUDA sanity: a small matmul to confirm kernels run fine
auto x = torch::randn({512, 512}, torch::dtype(torch::kFloat32).device(torch::kCUDA));
auto y = torch::randn({512, 512}, torch::dtype(torch::kFloat32).device(torch::kCUDA));
auto z = x.matmul(y);
torch::cuda::synchronize();
std::cout << "\nExtra CUDA op OK. z.mean() = " << z.mean().item<double>() << "\n";
std::cout << "\nPASS: TorchScript + CUDA sanity check succeeded.\n";
return 0;
}
CPP
# Build (Linux/macOS). Requires CUDA-enabled LibTorch.
# Notes:
# - If you get undefined references, your LIBTORCH is likely CPU-only or mismatched.
# - rpath keeps runtime from needing LD_LIBRARY_PATH.
c++ -O2 -std=c++17 torchscript_cuda_sanity.cpp -o torchscript_cuda_sanity \
-I"$LIBTORCH/include" \
-I"$LIBTORCH/include/torch/csrc/api/include" \
-L"$LIBTORCH/lib" \
-Wl,--no-as-needed \
-ltorch_cuda \
-lc10_cuda \
-Wl,--as-needed \
-ltorch \
-ltorch_cpu \
-lc10 \
-Wl,-rpath,"$LIBTORCH/lib"
./torchscript_cuda_sanityTo invoke, use
chmod +x torchscript_cuda_sanity.sh
LIBTORCH=/usr/local ./torchscript_cuda_sanity.shMetadata
Metadata
Assignees
Labels
No labels