Examples
Standalone Python scripts covering model loading, fine-tuning, RL training, and vision models. Run any example directly with python examples/<filename>.
Basics
01 — Simple Loading
Load a model from HuggingFace with FastLanguageModel.from_pretrained().
from mlx_tune import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
"mlx-community/Llama-3.2-1B-Instruct-4bit",
max_seq_length=2048,
load_in_4bit=True,
)
print("Model loaded successfully!")
02 — LoRA Configuration
Add LoRA adapters with get_peft_model() for parameter-efficient training.
from mlx_tune import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
"mlx-community/Llama-3.2-1B-Instruct-4bit",
max_seq_length=2048, load_in_4bit=True,
)
model = FastLanguageModel.get_peft_model(
model,
r=16,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
lora_alpha=16,
lora_dropout=0,
)
print(f"LoRA configured: rank={16}")
03 — Inference
Generate text with a loaded model using for_inference().
from mlx_tune import FastLanguageModel
from mlx_lm import generate
model, tokenizer = FastLanguageModel.from_pretrained(
"mlx-community/Llama-3.2-1B-Instruct-4bit",
max_seq_length=2048, load_in_4bit=True,
)
FastLanguageModel.for_inference(model)
prompt = "What is machine learning?"
response = generate(model.model, tokenizer,
prompt=prompt, max_tokens=100)
print(response)
SFT Training
04 — Simple Fine-tuning
LoRA setup and basic training configuration walkthrough.
from mlx_tune import FastLanguageModel, SFTTrainer, SFTConfig
model, tokenizer = FastLanguageModel.from_pretrained(
"mlx-community/Llama-3.2-1B-Instruct-4bit",
max_seq_length=2048, load_in_4bit=True,
)
model = FastLanguageModel.get_peft_model(model, r=16, lora_alpha=16)
05 — Complete Workflow
Full pipeline: load, configure, train, and save a fine-tuned model.
from mlx_tune import FastLanguageModel, SFTTrainer, SFTConfig
from datasets import load_dataset
model, tokenizer = FastLanguageModel.from_pretrained(
"mlx-community/Llama-3.2-1B-Instruct-4bit",
max_seq_length=2048, load_in_4bit=True,
)
model = FastLanguageModel.get_peft_model(model, r=16, lora_alpha=16)
dataset = load_dataset("yahma/alpaca-cleaned", split="train[:100]")
trainer = SFTTrainer(
model=model, train_dataset=dataset, tokenizer=tokenizer,
args=SFTConfig(output_dir="outputs", max_steps=50, learning_rate=2e-4),
)
trainer.train()
model.save_pretrained("lora_model")
06 — Real Training Test
Actual training run with SFTTrainer on a small dataset with logging.
from mlx_tune import FastLanguageModel, SFTTrainer, SFTConfig
# Loads model, configures LoRA, trains on real data
# with logging_steps=1 to monitor training loss
# See full source for dataset preparation details
07 — Unsloth Comparison
Side-by-side comparison of Unsloth vs MLX-Tune API usage.
# Shows Unsloth code (commented) alongside MLX-Tune equivalent
# Demonstrates that the API is 100% compatible
# See workflow page for detailed translation guide
08 — Exact Unsloth Pipeline
Complete Unsloth-compatible SFT workflow with chat templates, response masking, and export.
from mlx_tune import FastLanguageModel, SFTTrainer, SFTConfig
from mlx_tune import get_chat_template, train_on_responses_only
from datasets import load_dataset
model, tokenizer = FastLanguageModel.from_pretrained(
"mlx-community/Llama-3.2-1B-Instruct-4bit",
max_seq_length=2048, load_in_4bit=True,
)
model = FastLanguageModel.get_peft_model(model, r=16, lora_alpha=16)
tokenizer = get_chat_template(tokenizer, chat_template="llama-3")
dataset = load_dataset("yahma/alpaca-cleaned", split="train[:100]")
trainer = SFTTrainer(
model=model, train_dataset=dataset, tokenizer=tokenizer,
args=SFTConfig(output_dir="outputs", max_steps=50, learning_rate=2e-4),
)
trainer = train_on_responses_only(trainer,
instruction_part="<|start_header_id|>user<|end_header_id|>\n\n",
response_part="<|start_header_id|>assistant<|end_header_id|>\n\n",
)
trainer.train()
model.save_pretrained("lora_model")
model.save_pretrained_merged("merged", tokenizer)
RL Methods
09 — RL Training Methods
DPO, ORPO, GRPO, KTO, and SimPO trainer demonstrations with preference datasets.
from mlx_tune import (
DPOTrainer, DPOConfig,
ORPOTrainer, ORPOConfig,
GRPOTrainer, GRPOConfig,
KTOTrainer, SimPOTrainer,
)
# DPO: Direct Preference Optimization
trainer = DPOTrainer(
model=model, train_dataset=dpo_dataset,
args=DPOConfig(output_dir="dpo_output", max_steps=50),
)
trainer.train()
# GRPO: Group Relative Policy Optimization (DeepSeek R1 style)
trainer = GRPOTrainer(
model=model, train_dataset=grpo_dataset,
args=GRPOConfig(output_dir="grpo_output", max_steps=50),
)
trainer.train()
Vision Models
10 — Qwen3.5 Vision Fine-tuning
Fine-tune a vision-language model on image+text data (LaTeX OCR dataset).
from mlx_tune import FastVisionModel, UnslothVisionDataCollator, VLMSFTTrainer
from mlx_tune.vlm import VLMSFTConfig
from datasets import load_dataset
model, processor = FastVisionModel.from_pretrained(
"mlx-community/Qwen3.5-0.8B-bf16", load_in_4bit=False,
)
model = FastVisionModel.get_peft_model(model,
finetune_vision_layers=True, finetune_language_layers=True,
r=16, lora_alpha=16,
)
dataset = load_dataset("unsloth/LaTeX_OCR", split="train")
FastVisionModel.for_training(model)
trainer = VLMSFTTrainer(
model=model, tokenizer=processor,
data_collator=UnslothVisionDataCollator(model, processor),
train_dataset=converted_dataset,
args=VLMSFTConfig(max_steps=30, learning_rate=2e-4),
)
trainer.train()
11 — Qwen3.5 Text Fine-tuning
Fine-tune Qwen3.5 on text-only data without requiring any images.
from mlx_tune import FastVisionModel, UnslothVisionDataCollator, VLMSFTTrainer
from mlx_tune.vlm import VLMSFTConfig
# Qwen3.5 can be fine-tuned on text-only data
# No images needed — the model handles pure text conversations
model, processor = FastVisionModel.from_pretrained(
"mlx-community/Qwen3.5-0.8B-bf16", load_in_4bit=False,
)
# ... same training setup, just without image data