-
-
Save SunMarc/dcdb499ac16d355a8f265aa497645996 to your computer and use it in GitHub Desktop.
| # coding=utf-8 | |
| # Copyright 2023 The HuggingFace Inc. team. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| import os | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| import torch | |
| from datasets import load_dataset | |
| from peft import LoraConfig | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| HfArgumentParser, | |
| AutoTokenizer, | |
| TrainingArguments, | |
| ) | |
| from peft import prepare_model_for_kbit_training, get_peft_model | |
| from transformers import GPTQConfig | |
| from trl import SFTTrainer | |
| # This example fine-tunes Llama 2 model on Guanaco dataset | |
| # using GPTQ and peft. | |
| # Use it by correctly passing --model_name argument when running the | |
| # script. The default model is ybelkada/llama-7b-GPTQ-test | |
| # Versions used: | |
| # accelerate == 0.21.0 | |
| # auto-gptq == 0.4.2 | |
| # trl == 0.4.7 | |
| # peft from source | |
| # transformers from source | |
| # optimum from source | |
| # For models that have `config.pretraining_tp > 1` install: | |
| # pip install git+https://github.com/huggingface/transformers.git | |
| @dataclass | |
| class ScriptArguments: | |
| """ | |
| These arguments vary depending on how many GPUs you have, what their capacity and features are, and what size model you want to train. | |
| """ | |
| local_rank: Optional[int] = field(default=-1, metadata={"help": "Used for multi-gpu"}) | |
| per_device_train_batch_size: Optional[int] = field(default=4) | |
| per_device_eval_batch_size: Optional[int] = field(default=1) | |
| gradient_accumulation_steps: Optional[int] = field(default=4) | |
| learning_rate: Optional[float] = field(default=2e-4) | |
| max_grad_norm: Optional[float] = field(default=0.3) | |
| weight_decay: Optional[int] = field(default=0.001) | |
| lora_alpha: Optional[int] = field(default=16) | |
| lora_dropout: Optional[float] = field(default=0.1) | |
| lora_r: Optional[int] = field(default=64) | |
| max_seq_length: Optional[int] = field(default=512) | |
| model_name: Optional[str] = field( | |
| default="ybelkada/llama-7b-GPTQ-test", | |
| metadata={ | |
| "help": "The model that you want to train from the Hugging Face hub. E.g. gpt2, gpt2-xl, bert, etc." | |
| } | |
| ) | |
| dataset_name: Optional[str] = field( | |
| default="timdettmers/openassistant-guanaco", | |
| metadata={"help": "The preference dataset to use."}, | |
| ) | |
| num_train_epochs: Optional[int] = field( | |
| default=1, | |
| metadata={"help": "The number of training epochs for the reward model."}, | |
| ) | |
| fp16: Optional[bool] = field( | |
| default=False, | |
| metadata={"help": "Enables fp16 training."}, | |
| ) | |
| bf16: Optional[bool] = field( | |
| default=False, | |
| metadata={"help": "Enables bf16 training."}, | |
| ) | |
| packing: Optional[bool] = field( | |
| default=False, | |
| metadata={"help": "Use packing dataset creating."}, | |
| ) | |
| gradient_checkpointing: Optional[bool] = field( | |
| default=True, | |
| metadata={"help": "Enables gradient checkpointing."}, | |
| ) | |
| optim: Optional[str] = field( | |
| default="adamw_hf", | |
| metadata={"help": "The optimizer to use."}, | |
| ) | |
| lr_scheduler_type: str = field( | |
| default="constant", | |
| metadata={"help": "Learning rate schedule. Constant a bit better than cosine, and has advantage for analysis"}, | |
| ) | |
| max_steps: int = field(default=10000, metadata={"help": "How many optimizer update steps to take"}) | |
| warmup_ratio: float = field(default=0.03, metadata={"help": "Fraction of steps to do a warmup for"}) | |
| group_by_length: bool = field( | |
| default=True, | |
| metadata={ | |
| "help": "Group sequences into batches with same length. Saves memory and speeds up training considerably." | |
| }, | |
| ) | |
| save_steps: int = field(default=10, metadata={"help": "Save checkpoint every X updates steps."}) | |
| logging_steps: int = field(default=10, metadata={"help": "Log every X updates steps."}) | |
| merge_and_push: Optional[bool] = field( | |
| default=False, | |
| metadata={"help": "Merge and push weights after training"}, | |
| ) | |
| output_dir: str = field( | |
| default="./results", | |
| metadata={"help": "The output directory where the model predictions and checkpoints will be written."}, | |
| ) | |
| parser = HfArgumentParser(ScriptArguments) | |
| script_args = parser.parse_args_into_dataclasses()[0] | |
| def create_and_prepare_model(args): | |
| major, _ = torch.cuda.get_device_capability() | |
| if major >= 8: | |
| print("=" * 80) | |
| print("Your GPU supports bfloat16, you can accelerate training with the argument --bf16") | |
| print("=" * 80) | |
| # Load the entire model on the GPU 0 | |
| device_map = {"":0} | |
| # switch to `device_map = "auto"` for multi-GPU | |
| # device_map = "auto" | |
| # need to disable exllama kernel | |
| # exllama kernel are not very stable for training | |
| model = AutoModelForCausalLM.from_pretrained( | |
| args.model_name, | |
| device_map=device_map, | |
| quantization_config= GPTQConfig(bits=4, disable_exllama=True) | |
| ) | |
| # check: https://github.com/huggingface/transformers/pull/24906 | |
| model.config.pretraining_tp = 1 | |
| peft_config = LoraConfig( | |
| lora_alpha=script_args.lora_alpha, | |
| lora_dropout=script_args.lora_dropout, | |
| r=script_args.lora_r, | |
| bias="none", | |
| task_type="CAUSAL_LM", | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(script_args.model_name, trust_remote_code=True) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| return model, peft_config, tokenizer | |
| training_arguments = TrainingArguments( | |
| output_dir=script_args.output_dir, | |
| per_device_train_batch_size=script_args.per_device_train_batch_size, | |
| gradient_accumulation_steps=script_args.gradient_accumulation_steps, | |
| optim=script_args.optim, | |
| save_steps=script_args.save_steps, | |
| logging_steps=script_args.logging_steps, | |
| learning_rate=script_args.learning_rate, | |
| fp16=script_args.fp16, | |
| bf16=script_args.bf16, | |
| max_grad_norm=script_args.max_grad_norm, | |
| max_steps=script_args.max_steps, | |
| warmup_ratio=script_args.warmup_ratio, | |
| group_by_length=script_args.group_by_length, | |
| lr_scheduler_type=script_args.lr_scheduler_type, | |
| ) | |
| model, peft_config, tokenizer = create_and_prepare_model(script_args) | |
| model = prepare_model_for_kbit_training(model) | |
| model = get_peft_model(model, peft_config) | |
| model.config.use_cache = False | |
| dataset = load_dataset(script_args.dataset_name, split="train") | |
| # Fix weird overflow issue with fp16 training | |
| tokenizer.padding_side = "right" | |
| trainer = SFTTrainer( | |
| model=model, | |
| train_dataset=dataset, | |
| dataset_text_field="text", | |
| max_seq_length=script_args.max_seq_length, | |
| tokenizer=tokenizer, | |
| args=training_arguments, | |
| packing=script_args.packing, | |
| ) | |
| trainer.train() | |
| if script_args.merge_and_push: | |
| output_dir = os.path.join(script_args.output_dir, "final_checkpoints") | |
| trainer.model.save_pretrained(output_dir) | |
| # Free memory for merging weights | |
| del model | |
| torch.cuda.empty_cache() |
Hi @glf1030, make sure you have the latest version of peft.
Hi @glf1030, make sure you have the latest version of peft.
hi, my version is 0.7.1.
(llama_factory) [lifan@iZ0jld5hy53xg1wwoistghZ ~]$ pip show peft
Name: peft
Version: 0.7.1
Summary: Parameter-Efficient Fine-Tuning (PEFT)
Home-page: https://github.com/huggingface/peft
Author: The HuggingFace team
Author-email: sourab@huggingface.co
License: Apache
Location: /data/lifan/miniconda3/envs/llama_factory/lib/python3.9/site-packages
Requires: accelerate, huggingface-hub, numpy, packaging, psutil, pyyaml, safetensors, torch, tqdm, transformers
Required-by: auto-gptq
working for me with quantized model . Is it possible to merge the resulting adapter with the base model of gptq ? any example for this ?
@SunMarc
I am trying this with llama3.1 8B gptq My model gets loaded unevenly on the GPU so not able to use more than 1 batch size on a 4 A10 GPU machine. huggingface/transformers#32199
hi, thanks for your reply. I passed target_modules=['c_attn'], and it works for training;
but for inference, I used following code:
============================= I got following error ======================================
File "/data/lifan/miniconda3/envs/llama_factory/lib/python3.9/site-packages/peft/tuners/tuners_utils.py", line 90, in init
self.inject_adapter(self.model, adapter_name)
File "/data/lifan/miniconda3/envs/llama_factory/lib/python3.9/site-packages/peft/tuners/tuners_utils.py", line 247, in inject_adapter
self._create_and_replace(peft_config, adapter_name, target, target_name, parent, **optional_kwargs)
File "/data/lifan/miniconda3/envs/llama_factory/lib/python3.9/site-packages/peft/tuners/lora/model.py", line 202, in _create_and_replace
new_module = self._create_new_module(lora_config, adapter_name, target, **kwargs)
File "/data/lifan/miniconda3/envs/llama_factory/lib/python3.9/site-packages/peft/tuners/lora/model.py", line 355, in _create_new_module
raise ValueError(
ValueError: Target module QuantLinear() is not supported. Currently, only the following modules are supported:
torch.nn.Linear,torch.nn.Embedding,torch.nn.Conv2d,transformers.pytorch_utils.Conv1D.Would you please take a look at it ? Best.