CUDA可用,共有 1 个GPU设备可用。
当前使用的GPU设备索引:0
当前使用的GPU设备名称:NVIDIA T1000
GPU显存总量:4.00 GB
已使用的GPU显存:0.00 GB
剩余GPU显存:4.00 GB
PyTorch版本:1.10.1+cu102
pythonimport torch
# 检查CUDA是否可用
cuda_available = torch.cuda.is_available()
if cuda_available:
# 获取GPU设备数量
num_gpu = torch.cuda.device_count()
# 获取当前使用的GPU索引
current_gpu_index = torch.cuda.current_device()
# 获取当前GPU的名称
current_gpu_name = torch.cuda.get_device_name(current_gpu_index)
# 获取GPU显存的总量和已使用量
total_memory = torch.cuda.get_device_properties(current_gpu_index).total_memory / (1024 ** 3) # 显存总量(GB)
used_memory = torch.cuda.memory_allocated(current_gpu_index) / (1024 ** 3) # 已使用显存(GB)
free_memory = total_memory - used_memory # 剩余显存(GB)
print(f"CUDA可用,共有 {num_gpu} 个GPU设备可用。")
print(f"当前使用的GPU设备索引:{current_gpu_index}")
print(f"当前使用的GPU设备名称:{current_gpu_name}")
print(f"GPU显存总量:{total_memory:.2f} GB")
print(f"已使用的GPU显存:{used_memory:.2f} GB")
print(f"剩余GPU显存:{free_memory:.2f} GB")
else:
print("CUDA不可用。")
# 检查PyTorch版本
print(f"PyTorch版本:{torch.__version__}")
import torch
print(f"CUDA版本:{torch.version.cuda}")
windows先装显卡驱动,再装CUDA10.2,最后装了pytorch。
pip install torch==1.10.1+cu102 torchvision==0.13.1+cu102 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu102
pythonimport torch
import os
import subprocess
def check_gpu_availability():
print("==== Checking GPU Availability ====")
if torch.cuda.is_available():
print(f"GPU is available. Number of GPUs: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
print("No GPU available. Please check your hardware or drivers.")
return False
return True
def check_cuda_and_cudnn():
print("\n==== Checking CUDA and cuDNN Availability ====")
if torch.cuda.is_available():
print(f"CUDA version: {torch.version.cuda}")
print(f"cuDNN version: {torch.backends.cudnn.version()}")
print(f"cuDNN enabled: {torch.backends.cudnn.enabled}")
else:
print("CUDA or cuDNN not available. Please check the installation.")
def test_gpu_computation():
print("\n==== Testing Simple Computation on GPU ====")
try:
device = torch.device("cuda:2")
a = torch.randn(50, 50, device=device)
b = torch.randn(50, 50, device=device)
c = torch.matmul(a, b)
print("Matrix multiplication on GPU successful.")
except Exception as e:
print(f"Error during computation on GPU: {e}")
def check_gpu_memory():
print("\n==== Checking GPU Memory Usage ====")
try:
gpu_memory = torch.cuda.memory_summary()
print(gpu_memory)
except Exception as e:
print(f"Error retrieving GPU memory info: {e}")
def run_nvidia_smi():
print("\n==== Running nvidia-smi Command ====")
try:
# Run the nvidia-smi command and capture the output
result = subprocess.run(["nvidia-smi"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode == 0:
print(result.stdout)
else:
print(f"Error running nvidia-smi: {result.stderr}")
except Exception as e:
print(f"Error running nvidia-smi command: {e}")
def main():
print("==== Starting GPU Health Check ====")
# Step 1: Check GPU availability
if not check_gpu_availability():
return
# Step 2: Check CUDA and cuDNN versions
check_cuda_and_cudnn()
# Step 3: Test GPU computation
test_gpu_computation()
# Step 4: Check GPU memory usage
check_gpu_memory()
# Step 5: Run nvidia-smi to check GPU status
run_nvidia_smi()
print("\n==== GPU Health Check Completed ====")
if __name__ == "__main__":
main()
本文作者:Dong
本文链接:
版权声明:本博客所有文章除特别声明外,均采用 CC BY-NC。本作品采用《知识共享署名-非商业性使用 4.0 国际许可协议》进行许可。您可以在非商业用途下自由转载和修改,但必须注明出处并提供原作者链接。 许可协议。转载请注明出处!