python展开代码#!/usr/bin/env python3
import torch
import torch.nn as nn
import time
import threading
from datetime import datetime
def detect_gpus():
"""Detect available GPUs"""
if not torch.cuda.is_available():
print("CUDA is not available. No GPUs detected.")
return []
gpu_count = torch.cuda.device_count()
gpus = []
print(f"Detected {gpu_count} GPU(s):")
for i in range(gpu_count):
gpu_name = torch.cuda.get_device_name(i)
gpu_memory = torch.cuda.get_device_properties(i).total_memory / (1024**3) # GB
print(f" GPU {i}: {gpu_name} ({gpu_memory:.1f} GB)")
gpus.append(i)
return gpus
def gpu_worker(gpu_id, running):
"""Worker function for each GPU"""
device = torch.device(f'cuda:{gpu_id}')
try:
# Create large tensors to use GPU memory (about 60% of available memory)
gpu_memory = torch.cuda.get_device_properties(gpu_id).total_memory
target_memory = int(gpu_memory * 0.6) # Use 60% of GPU memory
elements_needed = target_memory // 4 # Each float32 is 4 bytes
# Create multiple large tensors
tensors = []
remaining = elements_needed
while remaining > 1000000: # At least 1M elements per tensor
size = min(remaining // 3, 5000000) # Max 5M elements per tensor
if size < 1000000:
break
tensor = torch.randn(size, device=device, requires_grad=True)
tensors.append(tensor)
remaining -= size
# Create a simple neural network
model = nn.Sequential(
nn.Linear(1000, 2000),
nn.ReLU(),
nn.Linear(2000, 1000),
nn.ReLU(),
nn.Linear(1000, 500)
).to(device)
# Create input data
input_data = torch.randn(64, 1000, device=device)
# Create optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
print(f"GPU {gpu_id}: Started with {len(tensors)} tensors")
while running[0]:
try:
# Forward pass
optimizer.zero_grad()
output = model(input_data)
loss = output.sum()
# Backward pass
loss.backward()
optimizer.step()
# Update tensors to keep them active
for i, tensor in enumerate(tensors):
if i % 2 == 0:
tensor.data = tensor.data * 0.99 + torch.randn_like(tensor) * 0.01
else:
tensor.data = torch.sin(tensor.data)
time.sleep(0.1) # Small delay
except Exception as e:
print(f"GPU {gpu_id}: Error - {e}")
time.sleep(1)
except Exception as e:
print(f"GPU {gpu_id}: Failed to initialize - {e}")
def display_status(gpus, running):
"""Display GPU status"""
while running[0]:
try:
print(f"\n[{datetime.now().strftime('%H:%M:%S')}] GPU Status:")
print("-" * 40)
for gpu_id in gpus:
try:
memory_allocated = torch.cuda.memory_allocated(gpu_id) / (1024**3)
memory_total = torch.cuda.get_device_properties(gpu_id).total_memory / (1024**3)
utilization = torch.cuda.utilization(gpu_id) if hasattr(torch.cuda, 'utilization') else "N/A"
print(f"GPU {gpu_id}: {memory_allocated:.1f}GB / {memory_total:.1f}GB ({(memory_allocated/memory_total)*100:.1f}%) | Util: {utilization}%")
except:
print(f"GPU {gpu_id}: Status unavailable")
time.sleep(5) # Update every 5 seconds
except KeyboardInterrupt:
break
except Exception as e:
print(f"Status display error: {e}")
time.sleep(5)
def main():
print("GPU Keep-Alive Script")
print("=" * 30)
# Detect GPUs
gpus = detect_gpus()
if not gpus:
return
# Shared flag to control all threads
running = [True]
# Start worker threads for each GPU
threads = []
for gpu_id in gpus:
thread = threading.Thread(target=gpu_worker, args=(gpu_id, running))
thread.daemon = True
thread.start()
threads.append(thread)
# Start status display thread
status_thread = threading.Thread(target=display_status, args=(gpus, running))
status_thread.daemon = True
status_thread.start()
print(f"\nStarted {len(gpus)} GPU workers")
print("Press Ctrl+C to stop...")
try:
# Keep main thread alive
while True:
time.sleep(1)
except KeyboardInterrupt:
print("\nStopping GPU workers...")
running[0] = False
# Wait for threads to finish
for thread in threads:
thread.join(timeout=2)
# Clear GPU memory
torch.cuda.empty_cache()
print("GPU workers stopped. Memory cleared.")
if __name__ == "__main__":
main()
关于tokenized数据缓存的机制。让我详细解释 tokenized_path
和 cache_dir
的区别以及它们的工作原理。
基于代码分析,这两个参数有不同的作用:
cache_dir
- HuggingFace Datasets的内部缓存load_dataset()
和数据预处理过程的中间缓存文件.map()
函数时的缓存)tokenized_path
- 完整的tokenized数据集存储save_to_disk()
保存)根据代码分析,Qwen2VL的数据预处理主要包含以下步骤:
src/llamafactory/data/loader.py:307-313
):
dataset.map()
方法进行批处理preprocessing_batch_size=1000
num_proc=84
(你的配置)在自然语言生成任务中,如何评估模型生成文本的质量是一个关键问题。BLEU和ROUGE是两个最常用的自动评估指标,本文将详细介绍这两个指标的原理、计算方法和代码实现。
bash展开代码rsync -av --checksum LLaMA-Factory-old/ LLaMA-Factory-qwen2vl/
这个指令使用 rsync
工具将 LLaMA-Factory-old/
目录的内容同步到 LLaMA-Factory-qwen2vl/
目录,具体参数解析如下:
LLaMA-Factory-old/
下的所有文件和子目录同步到 LLaMA-Factory-qwen2vl/
。路径末尾的 /
/
(如 LLaMA-Factory-old/
),则同步目录内的内容到目标路径。/
(如 LLaMA-Factory-old
),则同步目录本身到目标路径(包含目录名)。目标目录存在性
LLaMA-Factory-qwen2vl/
不存在,会自动创建。--checksum
的性能影响
计算校验和会增加 CPU 开销,但适合对文件一致性要求严格的场景(如防止隐藏的数据损坏)。
管理员运行命令:
bash展开代码reg.exe add "HKCU\Software\Classes\CLSID\{86ca1aa0-34aa-4e8b-a509-50c905bae2a2}\InprocServer32" /f /ve
重启就恢复win10右键了
这个是恢复win11右键:
bash展开代码reg.exe delete "HKCU\Software\Classes\CLSID\{86ca1aa0-34aa-4e8b-a509-50c905bae2a2}\InprocServer32" /va /f