-
-
Notifications
You must be signed in to change notification settings - Fork 816
Description
System Info
Reproduction
import unsloth
from datetime import datetime
import swanlab
import torch
from peft import LoraConfig, TaskType, get_peft_model
from transformers import set_seed, EarlyStoppingCallback, AutoModelForCausalLM, AutoTokenizer
from transformers.training_args import OptimizerNames, SchedulerType
from trl import SFTConfig, SFTTrainer
from data_processor import DataProcessor, DatasetConfigBuilder
torch.cuda.set_per_process_memory_fraction(0.7)
# 项目路径
# PROJECT_PATH = '/home/btcc/PycharmProjects/llmProject'
PROJECT_PATH = '/workspace'
# 模型路径
MODEL_PATH = f'{PROJECT_PATH}/fine-tuned-model/full_text_generate/20260121063342'
# MODEL_PATH = '{PROJECT_PATH}/models/Qwen/Qwen3-8B'
SAVE_PATH = f'{PROJECT_PATH}/fine-tuned-model/full_text_generate'
# 数据集路径
DATA_PATH_JP = f'{PROJECT_PATH}/data/diamond/train.jsonl'
# 提示词路径
SYSTEM_PROMPT_PATH = f'{PROJECT_PATH}/prompt/train/full_text_model/system.md'
USER_PROMPT_PATH = f'{PROJECT_PATH}/prompt/train/full_text_model/user.md'
# 设置随机种子
set_seed(42)
# 登录SwanLab
swanlab.login(api_key="")
# 定义数据集配置列表
raw_configs = [
{'path': DATA_PATH_JP, 'system_prompt_path': SYSTEM_PROMPT_PATH, 'user_prompt_path': USER_PROMPT_PATH},
# {'path': DATA_PATH_RU, 'system_prompt_path': SYSTEM_PROMPT_PATH, 'user_prompt_path': USER_PROMPT_PATH},
# {'path': DATA_PATH_EN_AU, 'system_prompt_path': SYSTEM_PROMPT_PATH, 'user_prompt_path': USER_PROMPT_PATH}
]
# 使用配置构建器构建最终配置
dataset_configs = DatasetConfigBuilder.build_configs(raw_configs)
# 训练配置和启动训练
def train_model(model, tokenizer, train_dataset, eval_dataset):
# 训练配置
train_args = SFTConfig(
output_dir="outPut",
eval_strategy="steps",
eval_steps=10,
per_device_train_batch_size=1,
per_device_eval_batch_size=1,
gradient_accumulation_steps=16,
save_steps=500,
num_train_epochs=3,
logging_steps=1,
learning_rate=2e-4,
lr_scheduler_type=SchedulerType.COSINE,
warmup_ratio=0.1,
save_on_each_node=True,
gradient_checkpointing=True,
optim=OptimizerNames.PAGED_ADAMW_8BIT,
fp16=False,
bf16=True,
# dataloader_num_workers=0, # 数据加载进程数
dataloader_pin_memory=True, # 启用内存pin
remove_unused_columns=True,
# ddp_find_unused_parameters=False,
load_best_model_at_end=True,
seed=42,
report_to="swanlab",
completion_only_loss=True,
max_length=tokenizer.model_max_length
)
trainer = SFTTrainer(
model=model,
args=train_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
processing_class=tokenizer,
callbacks=[
EarlyStoppingCallback(
early_stopping_patience=3
)
],
)
trainer_stats = unsloth.unsloth_train(trainer)
return trainer_stats
if __name__ == "__main__":
model, tokenizer = unsloth.FastLanguageModel.from_pretrained(
MODEL_PATH,
load_in_4bit=False,
dtype=torch.bfloat16,
device_map="auto",
max_seq_length=131072,
# max_seq_length=40960,
local_files_only=True,
)
print(model.device)
data_processor = DataProcessor(dataset_configs=dataset_configs)
train_dataset, eval_dataset = data_processor.load_and_process_data(tokenizer)
model = unsloth.FastLanguageModel.get_peft_model(
model,
r=32,
target_modules=[
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj",
],
lora_alpha=64,
lora_dropout=0.05,
bias="none",
use_gradient_checkpointing="unsloth",
random_state=3407,
)
model.print_trainable_parameters()
trainer_stats = train_model(model, tokenizer, train_dataset, eval_dataset)
save_path = f"{SAVE_PATH}/{datetime.now().strftime('%Y%m%d%H%M%S')}"
model.save_pretrained(f"{save_path}/lora_adapter", safe_serialization=True)
tokenizer.save_pretrained(save_path)
merged_model = model.merge_and_unload()
merged_model.save_pretrained(save_path, safe_serialization=True)
swanlab.finish()Expected behavior
root@73dbc62a3976:/workspace# python sft_train.py
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
/usr/local/lib/python3.12/dist-packages/torch/library.py:356: UserWarning: Warning only once for all operators, other operators may also be overridden.
Overriding a previously registered kernel for the same operator and the same dispatch key
operator: flash_attn::_flash_attn_backward(Tensor dout, Tensor q, Tensor k, Tensor v, Tensor out, Tensor softmax_lse, Tensor(a6!)? dq, Tensor(a7!)? dk, Tensor(a8!)? dv, float dropout_p, float softmax_scale, bool causal, SymInt window_size_left, SymInt window_size_right, float softcap, Tensor? alibi_slopes, bool deterministic, Tensor? rng_state=None) -> Tensor
registered at /usr/local/lib/python3.12/dist-packages/torch/_library/custom_ops.py:922
dispatch key: ADInplaceOrView
previous kernel: no debug info
new kernel: registered at /usr/local/lib/python3.12/dist-packages/torch/_library/custom_ops.py:922 (Triggered internally at /opt/pytorch/pytorch/aten/src/ATen/core/dispatch/OperatorEntry.cpp:208.)
self.m.impl(
[bitsandbytes.cextension|ERROR]bitsandbytes library load error: Configured CUDA binary not found at /usr/local/lib/python3.12/dist-packages/bitsandbytes/libbitsandbytes_cuda131.so
Traceback (most recent call last):
File "/usr/local/lib/python3.12/dist-packages/bitsandbytes/cextension.py", line 320, in
lib = get_native_library()
^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/bitsandbytes/cextension.py", line 288, in get_native_library
raise RuntimeError(f"Configured {BNB_BACKEND} binary not found at {cuda_binary_path}")
RuntimeError: Configured CUDA binary not found at /usr/local/lib/python3.12/dist-packages/bitsandbytes/libbitsandbytes_cuda131.so
0%| | 0/63 [00:00<?, ?it/s]
File "/workspace/sft_train.py", line 193, in
trainer_stats = train_model(model, tokenizer, train_dataset, eval_dataset)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/sft_train.py", line 96, in train_model
trainer_stats = unsloth.unsloth_train(trainer)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/unsloth/trainer.py", line 108, in unsloth_train
return trainer.train(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/unsloth_compiled_cache/UnslothSFTTrainer.py", line 64, in wrapper
output = f(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/transformers/trainer.py", line 2325, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "", line 396, in _fast_inner_training_loop
File "/usr/local/lib/python3.12/dist-packages/accelerate/optimizer.py", line 179, in step
self.optimizer.step(closure)
File "/usr/local/lib/python3.12/dist-packages/torch/optim/lr_scheduler.py", line 173, in wrapper
return func.get(opt, opt.class)(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/torch/optim/optimizer.py", line 526, in wrapper
out = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 124, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/bitsandbytes/optim/optimizer.py", line 325, in step
self.init_state(group, p, gindex, pindex)
File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 124, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/bitsandbytes/optim/optimizer.py", line 507, in init_state
state["state1"] = self.get_state_buffer(p, dtype=torch.uint8)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/bitsandbytes/optim/optimizer.py", line 368, in get_state_buffer
buff = F.get_paged(*p.shape, dtype=dtype, device=p.device)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/bitsandbytes/functional.py", line 136, in get_paged
cuda_ptr = lib.cget_managed_ptr(ct.c_size_t(num_bytes))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/bitsandbytes/cextension.py", line 269, in throw_on_call
raise RuntimeError(f"{self.formatted_error}Native code method attempted to call: lib.{name}()")
🚨 Forgot to compile the bitsandbytes library? 🚨
- You're not using the package but checked-out the source code
- You MUST compile from source
Attempted to use bitsandbytes native library functionality but it's not available.
This typically happens when:
- bitsandbytes doesn't ship with a pre-compiled binary for your CUDA version
- The library wasn't compiled properly during installation from source
To make bitsandbytes work, the compiled library version MUST exactly match the linked CUDA version.
If your CUDA version doesn't have a pre-compiled binary, you MUST compile from source.
You have two options:
- COMPILE FROM SOURCE (required if no binary exists):
https://huggingface.co/docs/bitsandbytes/main/en/installation#cuda-compile - Use BNB_CUDA_VERSION to specify a DIFFERENT CUDA version from the detected one, which is installed on your machine and matching an available pre-compiled version listed above
Original error: Configured CUDA binary not found at /usr/local/lib/python3.12/dist-packages/bitsandbytes/libbitsandbytes_cuda131.so
🔍 Run this command for detailed diagnostics:
python -m bitsandbytes
If you've tried everything and still have issues:
- Include ALL version info (operating system, bitsandbytes, pytorch, cuda, python)
- Describe what you've tried in detail
- Open an issue with this information:
https://github.com/bitsandbytes-foundation/bitsandbytes/issues
Native code method attempted to call: lib.cget_managed_ptr()
0%| | 0/63 [07:41<?, ?it/s]