MLX is an array framework for machine learning on Apple silicon, designed by Apple machine learning research. It offers high performance, familiar APIs, and seamless integration with Apple's ecosystem.
- Familiar APIs: Python API based on NumPy, with C++, Swift interfaces
- Composable function transformations: For automatic differentiation, vectorization, optimization
- Lazy computation: Arrays only materialized when needed
- Dynamic graph construction: No slow recompilations when shapes change
- Unified memory model: Operations across devices without data copies
# Install MLX
pip install mlx
# Install MLX-LM for language models
pip install mlx-lm
import mlx.core as mx
# Create arrays
a = mx.array([1, 2, 3])
b = mx.zeros((3, 3))
c = mx.ones((2, 4))
d = mx.random.normal((2, 2))
# Basic operations
result = a + b
result = mx.matmul(b, b)
# Evaluate lazily computed arrays
mx.eval(result)
import mlx.core as mx
# Gradient computation
def f(x):
return mx.sum(x ** 2)
grad_f = mx.grad(f)
x = mx.array([1.0, 2.0, 3.0])
grad_value = grad_f(x) # [2.0, 4.0, 6.0]
# Vectorization
def scalar_fn(x):
return x ** 2
vector_fn = mx.vmap(scalar_fn)
vector_fn(mx.array([1.0, 2.0, 3.0])) # [1.0, 4.0, 9.0]
# Combined transformations
grad_vector_fn = mx.grad(mx.vmap(scalar_fn))
import mlx.core as mx
@mx.compile
def optimized_fn(x):
return mx.sum(x ** 2)
# With state tracking
state = [mx.array(1.0)]
@mx.compile(inputs=state, outputs=state)
def stateful_fn(x):
result = x + state[0]
state[0] = result
return result
import mlx.core as mx
import mlx.nn as nn
class MLP(nn.Module):
def __init__(self, in_dims, hidden_dims, out_dims):
super().__init__()
self.layers = [
nn.Linear(in_dims, hidden_dims),
nn.Linear(hidden_dims, out_dims)
]
def __call__(self, x):
for i, layer in enumerate(self.layers[:-1]):
x = layer(x)
x = mx.maximum(x, 0) # ReLU activation
return self.layers[-1](x)
# Create model
model = MLP(10, 128, 1)
# Initialize parameters
mx.eval(model.parameters())
# Access parameters
params = model.parameters()
# Linear layer
linear = nn.Linear(input_dim, output_dim)
# Convolutional layer
conv = nn.Conv2d(in_channels, out_channels, kernel_size=3)
# Layer normalization
norm = nn.LayerNorm(dim)
# Dropout (for training)
dropout = nn.Dropout(p=0.5)
# Multi-head attention
attention = nn.MultiHeadAttention(dim, num_heads)
import mlx.nn.losses as losses
# Common loss functions
mse_loss = losses.mse_loss(predictions, targets)
bce_loss = losses.binary_cross_entropy(predictions, targets)
ce_loss = losses.cross_entropy(predictions, targets)
import mlx.optimizers as optim
# Create optimizer
optimizer = optim.SGD(learning_rate=0.01)
# Or
optimizer = optim.Adam(learning_rate=0.001, betas=(0.9, 0.999))
# Update model with gradients
optimizer.update(model, gradients)
# Evaluate optimizer state and model parameters
mx.eval(optimizer.state, model.parameters())
import mlx.core as mx
import mlx.nn as nn
import mlx.optimizers as optim
# Create model
model = MyModel()
mx.eval(model.parameters())
# Define loss function
def loss_fn(model, x, y):
y_pred = model(x)
return nn.losses.mse_loss(y_pred, y)
# Create gradient function and optimizer
loss_and_grad_fn = nn.value_and_grad(model, loss_fn)
optimizer = optim.Adam(learning_rate=0.001)
# Training loop
for epoch in range(num_epochs):
for x_batch, y_batch in data_loader:
# Forward and backward pass
loss, grads = loss_and_grad_fn(model, x_batch, y_batch)
# Update model parameters
optimizer.update(model, grads)
# Evaluate parameters and optimizer state
mx.eval(model.parameters(), optimizer.state)
# Generate text with a model
mlx_lm.generate --model mistralai/Mistral-7B-Instruct-v0.3 --prompt "hello"
# Stream text generation
mlx_lm.generate --model mistralai/Mistral-7B-Instruct-v0.3 --prompt "hello" --stream
# Set generation parameters
mlx_lm.generate --model <model_name> --prompt "hello" --max-tokens 100 --temperature 0.7 --top-p 0.9
# Convert Hugging Face model to MLX format
mlx_lm.convert --hf-path mistralai/Mistral-7B-Instruct-v0.3
# Convert and quantize to 4-bit
mlx_lm.convert --hf-path mistralai/Mistral-7B-Instruct-v0.3 -q
# Convert, quantize, and upload to Hugging Face
mlx_lm.convert --hf-path mistralai/Mistral-7B-Instruct-v0.3 -q --upload-repo <username>/<repo-name>
# Start interactive chat with a model
mlx_lm.chat --model mistralai/Mistral-7B-Instruct-v0.3
# Use a local model
mlx_lm.chat --model ./path/to/local/model
# Basic LoRA fine-tuning
mlx_lm.lora --model mistralai/Mistral-7B-v0.1 --train --data ./my_data_folder
# Set specific parameters
mlx_lm.lora \
--model mistralai/Mistral-7B-v0.1 \
--train \
--data ./my_data_folder \
--batch-size 1 \
--num-layers 4 \
--iters 500
# Use quantized model (QLoRA)
mlx_lm.lora --model <quantized_model_path> --train --data ./my_data_folder
# Test a fine-tuned model
mlx_lm.lora \
--model <path_to_model> \
--adapter-path <path_to_adapters> \
--data <path_to_data> \
--test
# Generate with a fine-tuned model
mlx_lm.generate \
--model <path_to_model> \
--adapter-path <path_to_adapters> \
--prompt "<your_prompt>"
# Fuse LoRA adapters with the original model
mlx_lm.fuse \
--model <path_to_model> \
--adapter-path <path_to_adapters> \
--save-path <output_path>
# Fuse and upload to Hugging Face
mlx_lm.fuse \
--model <path_to_model> \
--adapter-path <path_to_adapters> \
--save-path <output_path> \
--upload-name <username>/<repo-name>
# Export to GGUF format
mlx_lm.fuse \
--model <path_to_model> \
--adapter-path <path_to_adapters> \
--export-gguf
# Scan all locally cached models
mlx_lm.manage --scan
# Delete specific models
mlx_lm.manage --delete --pattern <model_name_pattern>
# Run OpenAI-compatible API server
mlx_lm.server
# Interact with the server
curl localhost:8080/v1/chat/completions -d '{
"model": "mlx-community/Llama-3.2-3B-Instruct-4bit",
"max_completion_tokens": 2000,
"messages": [{"role": "user", "content": "Hello there"}]
}'
// Add dependency in Package.swift
dependencies: [
.package(url: "https://github.com/ml-explore/mlx-swift", from: "0.10.0")
]
// Import packages
import MLX
import MLXNN
import MLXOptimizers
import MLXRandom