Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions tests/test_dpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def test_tokenize_row_with_truncation_and_special_tokens(self):
class TestDPOTrainer(TrlTestCase):
def setup_method(self):
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.model = AutoModelForCausalLM.from_pretrained(self.model_id, dtype="float32")
self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token
Expand Down Expand Up @@ -248,7 +248,7 @@ def test_train_loss_types(self, loss_type):
@require_liger_kernel
def test_train_encoder_decoder_liger(self):
model_id = "trl-internal-testing/tiny-BartModel"
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id, dtype="float32")
dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
tokenizer = AutoTokenizer.from_pretrained(model_id)

Expand Down Expand Up @@ -749,7 +749,7 @@ def test_dpo_lora_tags(self):
)

# lora model
model = AutoModelForCausalLM.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="float32")

training_args = DPOConfig(
output_dir=self.tmp_dir,
Expand Down Expand Up @@ -785,7 +785,7 @@ def test_dpo_tags(self):
tokenizer = AutoTokenizer.from_pretrained(model_id)

# lora model
model = AutoModelForCausalLM.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="float32")

training_args = DPOConfig(
output_dir=self.tmp_dir,
Expand Down Expand Up @@ -885,7 +885,7 @@ def test_dpo_loss_alpha_div_f(self):
tokenizer = AutoTokenizer.from_pretrained(model_id)

# lora model
model = AutoModelForCausalLM.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="float32")
training_args = DPOConfig(
output_dir=self.tmp_dir,
per_device_train_batch_size=2,
Expand Down Expand Up @@ -926,7 +926,7 @@ def test_dpo_loss_js_div_f(self):
tokenizer = AutoTokenizer.from_pretrained(model_id)

# lora model
model = AutoModelForCausalLM.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="float32")

training_args = DPOConfig(
output_dir=self.tmp_dir,
Expand Down Expand Up @@ -968,7 +968,7 @@ def test_dpo_trainer_use_logits_to_keep(self):
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="float32")

training_args = DPOConfig(
output_dir=self.tmp_dir,
Expand Down Expand Up @@ -1047,7 +1047,7 @@ def test_dpo_trainer_with_tools(self):
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="float32")

# Define dummy test tools
def get_current_temperature(location: str):
Expand Down Expand Up @@ -1086,7 +1086,7 @@ def test_padding_free(self):
# Normally, we need `attn_implementation="flash_attention_2"` to that the model returns correct logits.
# Without it, the logits may be incorrect, but that's fine here. This test focuses only on the inner logic
# of padding_free.
model = AutoModelForCausalLM.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="float32")

training_args = DPOConfig(
output_dir=self.tmp_dir,
Expand Down Expand Up @@ -1116,7 +1116,7 @@ def test_padding_free(self):
assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12)

def test_compute_metrics(self):
model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", dtype="float32")
ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
tokenizer.pad_token = tokenizer.eos_token
Expand Down Expand Up @@ -1346,7 +1346,7 @@ def test_vdpo_trainer(self, model_id):
dataset = dataset.cast_column("images", features.Sequence(features.Image()))

# Instantiate the model and processor
model = AutoModelForImageTextToText.from_pretrained(model_id)
model = AutoModelForImageTextToText.from_pretrained(model_id, dtype="float32")
ref_model = AutoModelForImageTextToText.from_pretrained(model_id)
processor = AutoProcessor.from_pretrained(model_id)

Expand Down Expand Up @@ -1444,7 +1444,7 @@ def test_dpo_bare_model(self, model_id, loss_type, pre_compute_logits):
"""
A test that tests the simple usage of `DPOTrainer` using a bare model in full precision.
"""
model = AutoModelForCausalLM.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="float32")
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token if tokenizer.pad_token is None else tokenizer.pad_token

Expand Down Expand Up @@ -1500,7 +1500,7 @@ def test_dpo_peft_model(self, model_id, loss_type, pre_compute_logits, gradient_
A test that tests the simple usage of `DPOTrainer` using a peft model in full precision + different scenarios
of gradient checkpointing.
"""
model = AutoModelForCausalLM.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="float32")
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token if tokenizer.pad_token is None else tokenizer.pad_token

Expand Down Expand Up @@ -1566,7 +1566,9 @@ def test_dpo_peft_model_qlora(self, model_id, loss_type, pre_compute_logits, gra
"""
quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config)
model = AutoModelForCausalLM.from_pretrained(
model_id, dtype="float32", quantization_config=quantization_config
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token if tokenizer.pad_token is None else tokenizer.pad_token

Expand Down
8 changes: 5 additions & 3 deletions tests/test_grpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1093,7 +1093,9 @@ def test_training_with_entropy_filter(self):
@pytest.mark.skip(reason="We should add a mock for the vLLM server.")
def test_training_vllm_and_peft(self):
"""Test that training works with vLLM for generation."""
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct") # tiny model is too small for vLLM
model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2.5-0.5B-Instruct", dtype="float32"
) # tiny model is too small for vLLM
base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")

Expand Down Expand Up @@ -1769,7 +1771,7 @@ def reward_func(completions, **kwargs):
@require_vision
@require_peft
def test_training_vlm_peft(self, model_id):
model = AutoModelForImageTextToText.from_pretrained(model_id)
model = AutoModelForImageTextToText.from_pretrained(model_id, dtype="float32")
base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]
dataset = load_dataset("trl-internal-testing/zen-image", "conversational_prompt_only", split="train")

Expand Down Expand Up @@ -2305,7 +2307,7 @@ def test_training_with_liger_grpo_kernel_and_peft(self, model_name):
logging_strategy="no",
)

model = AutoModelForCausalLM.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, dtype="float32")
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token if tokenizer.pad_token is None else tokenizer.pad_token

Expand Down
8 changes: 4 additions & 4 deletions tests/test_reward_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def test_train_model_dtype(self):
def test_train_dense_with_peft_config(self):
# Get the base model parameter names
model_id = "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5"
model = AutoModelForSequenceClassification.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id, dtype="float32")
base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]

# Get the dataset
Expand Down Expand Up @@ -302,7 +302,7 @@ def test_train_dense_with_peft_config(self):
def test_train_moe_with_peft_config(self):
# Get the base model parameter names
model_id = "trl-internal-testing/tiny-Qwen3MoeForSequenceClassification"
model = AutoModelForSequenceClassification.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id, dtype="float32")
base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]

# Get the dataset
Expand Down Expand Up @@ -339,7 +339,7 @@ def test_train_moe_with_peft_config(self):
def test_train_peft_model(self):
# Get the base model
model_id = "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5"
model = AutoModelForSequenceClassification.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id, dtype="float32")

# Get the base model parameter names
base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]
Expand Down Expand Up @@ -379,7 +379,7 @@ def test_train_peft_model(self):
def test_train_with_peft_config_and_gradient_checkpointing(self):
# Get the base model parameter names
model_id = "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5"
model = AutoModelForSequenceClassification.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id, dtype="float32")
base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]

# Get the dataset
Expand Down
6 changes: 4 additions & 2 deletions tests/test_rloo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,9 @@ def test_training_beta_zero(self):
@pytest.mark.skip(reason="We should add a mock for the vLLM server.")
def test_training_vllm_and_peft(self):
"""Test that training works with vLLM for generation."""
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct") # tiny model is too small for vLLM
model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2.5-0.5B-Instruct", dtype="float32"
) # tiny model is too small for vLLM
base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")

Expand Down Expand Up @@ -1284,7 +1286,7 @@ def reward_func(completions, **kwargs):
@require_vision
@require_peft
def test_training_vlm_peft(self, model_id):
model = AutoModelForImageTextToText.from_pretrained(model_id)
model = AutoModelForImageTextToText.from_pretrained(model_id, dtype="float32")
base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]
dataset = load_dataset("trl-internal-testing/zen-image", "conversational_prompt_only", split="train")

Expand Down
20 changes: 10 additions & 10 deletions tests/test_sft_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def test_train_model_dtype(self):
def test_train_dense_with_peft_config_lora(self):
# Get the base model parameter names
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
model = AutoModelForCausalLM.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="float32")
base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]

# Get the dataset
Expand Down Expand Up @@ -525,7 +525,7 @@ def test_train_dense_with_peft_config_lora(self):
def test_train_with_peft_config_prompt_tuning(self, peft_type):
# Get the base model parameter names
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
model = AutoModelForCausalLM.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="float32")
base_param_names = [f"base_model.{n}" for n, _ in model.named_parameters()]

# Get the dataset
Expand Down Expand Up @@ -583,7 +583,7 @@ def test_train_with_peft_config_prompt_tuning(self, peft_type):
def test_train_moe_with_peft_config(self):
# Get the base model parameter names
model_id = "trl-internal-testing/tiny-GptOssForCausalLM"
model = AutoModelForCausalLM.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="float32")
base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]

# Get the dataset
Expand Down Expand Up @@ -620,7 +620,7 @@ def test_train_moe_with_peft_config(self):
def test_train_peft_model(self):
# Get the base model
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
model = AutoModelForCausalLM.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="float32")

# Get the base model parameter names
base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]
Expand Down Expand Up @@ -660,7 +660,7 @@ def test_train_peft_model(self):
def test_train_with_peft_config_and_gradient_checkpointing(self):
# Get the base model parameter names
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
model = AutoModelForCausalLM.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="float32")
base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]

# Get the dataset
Expand Down Expand Up @@ -1596,7 +1596,7 @@ def test_peft_with_quantization(self):
@require_peft
def test_prompt_tuning_peft_model(self):
"""Test that SFT works with Prompt Tuning and a pre-converted PeftModel"""
model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", dtype="float32")
model = get_peft_model(model, PromptEncoderConfig(task_type=TaskType.CAUSAL_LM, num_virtual_tokens=8))

dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
Expand Down Expand Up @@ -1698,7 +1698,7 @@ def test_sft_trainer_transformers(self, model_name, packing):
max_length=self.max_length,
)

model = AutoModelForCausalLM.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, dtype="float32")
tokenizer = AutoTokenizer.from_pretrained(model_name)

trainer = SFTTrainer(
Expand Down Expand Up @@ -1738,7 +1738,7 @@ def test_sft_trainer_peft(self, model_name, packing):
max_length=self.max_length,
)

model = AutoModelForCausalLM.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, dtype="float32")
tokenizer = AutoTokenizer.from_pretrained(model_name)

trainer = SFTTrainer(
Expand Down Expand Up @@ -1869,7 +1869,7 @@ def test_sft_trainer_transformers_mp_gc_peft(self, model_name, packing, gradient
gradient_checkpointing_kwargs=gradient_checkpointing_kwargs,
)

model = AutoModelForCausalLM.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, dtype="float32")
tokenizer = AutoTokenizer.from_pretrained(model_name)

trainer = SFTTrainer(
Expand Down Expand Up @@ -1920,7 +1920,7 @@ def test_sft_trainer_transformers_mp_gc_device_map(
gradient_checkpointing_kwargs=gradient_checkpointing_kwargs,
)

model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device_map)
model = AutoModelForCausalLM.from_pretrained(model_name, dtype="float32", device_map=device_map)
tokenizer = AutoTokenizer.from_pretrained(model_name)

trainer = SFTTrainer(
Expand Down
Loading