import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
root="data",
train=True,
download=True,
transform=ToTensor(),
)
# Download test data from open datasets.
test_data = datasets.FashionMNIST(
root="data",
train=False,
download=True,
transform=ToTensor(),
)
batch_size = 64
# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
for X, y in test_dataloader:
print(f"Shape of X [N, C, H, W]: {X.shape}")
print(f"Shape of y: {y.shape} {y.dtype}")
break
#device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
# Define model
class NeuralNetwork(nn.Module):
def __init__(self):
super().__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10)
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork().to(device)
print(model)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
if batch % 100 == 0:
loss, current = loss.item(), (batch + 1) * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
epochs = 5
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer)
test(test_dataloader, model, loss_fn)
print("Done!")
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")
model1 = NeuralNetwork().to(device)
model1.load_state_dict(torch.load("model.pth"))
#model1.load_state_dict(torch.load("model.pth", weights_only=True))
classes = [
"T-shirt/top",
"Trouser",
"Pullover",
"Dress",
"Coat",
"Sandal",
"Shirt",
"Sneaker",
"Bag",
"Ankle boot",
]
model1.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
x = x.to(device)
pred = model1(x)
predicted, actual = classes[pred[0].argmax(0)], classes[y]
print(f'Predicted: "{predicted}", Actual: "{actual}"')
这段代码是一个使用 PyTorch 框架进行图像分类的完整例子 (使用的torch 1.12版本,注释中有更新版本的写法),涉及到数据加载、模型定义、训练、测试和模型保存/加载的过程。下面我将详细解释每一部分。
1. 导入必要的库
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
torch是 PyTorch 的核心库。nn包含了神经网络的模块(例如层、损失函数等)。DataLoader是用来处理数据加载的工具,特别是当数据集较大时,可以按批次加载数据。datasets提供了常用的标准数据集(例如 MNIST, FashionMNIST 等)。ToTensor是一个变换操作,它将数据从 PIL 图像或 NumPy 数组转换为 PyTorch 的 Tensor。
2. 下载训练数据和测试数据
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
root="data",
train=True,
download=True,
transform=ToTensor(),
)
# Download test data from open datasets.
test_data = datasets.FashionMNIST(
root="data",
train=False,
download=True,
transform=ToTensor(),
)
- 使用
datasets.FashionMNIST下载 FashionMNIST 数据集。train=True表示训练集,train=False表示测试集。 transform=ToTensor()是对数据进行转换,使用ToTensor将图像数据转换为 PyTorch 中的 Tensor 格式。
3. 创建 DataLoader
batch_size = 64
# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
DataLoader用于从数据集中按批次加载数据,batch_size=64表示每次加载 64 个样本。train_dataloader和test_dataloader分别为训练集和测试集的加载器。
4. 检查数据形状
for X, y in test_dataloader:
print(f"Shape of X [N, C, H, W]: {X.shape}")
print(f"Shape of y: {y.shape} {y.dtype}")
break
- 这里循环取出测试集中的一个批次的数据,
X是图像数据,y是标签。 X.shape显示的是图像的形状,[N, C, H, W]分别表示:批次大小、通道数(灰度图是 1,RGB 图是 3)、高度和宽度。
5. 选择设备(CPU 或 GPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
- 根据是否有 GPU 可用,选择设备类型。如果 CUDA 可用,使用 GPU(
cuda),否则使用 CPU(cpu)。
6. 定义神经网络模型
class NeuralNetwork(nn.Module):
def __init__(self):
super().__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10)
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
NeuralNetwork是定义的神经网络模型类,继承自nn.Module。__init__方法中定义了网络结构:Flatten()将输入的 28x28 图像展平为 1D 向量。nn.Linear(28*28, 512)将输入的 28x28 的图像数据映射到 512 维的向量,后面接一个 ReLU 激活函数。nn.Linear(512, 512)和nn.ReLU()再进行一次线性变换和激活。- 最后,
nn.Linear(512, 10)将 512 维的向量映射到 10 个类别(FashionMNIST 有 10 个类别)。
forward()方法定义了数据的前向传播过程。
7. 初始化模型并选择设备
model = NeuralNetwork().to(device)
print(model)
- 创建
NeuralNetwork模型实例,并将模型移到选择的设备(CPU 或 GPU)上。
8. 定义损失函数和优化器
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
loss_fn是交叉熵损失函数,适用于多类分类任务。optimizer使用随机梯度下降(SGD)算法,学习率为1e-3。
9. 训练函数
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
if batch % 100 == 0:
loss, current = loss.item(), (batch + 1) * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
train()函数是模型的训练过程:dataloader提供数据,X是输入数据,y是标签。model(X)计算模型的预测,loss_fn(pred, y)计算预测和真实标签之间的损失。loss.backward()计算梯度,optimizer.step()更新模型参数,optimizer.zero_grad()清除梯度。- 每 100 个批次打印一次当前的损失。
10. 测试函数
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
test()函数用于评估模型的性能:model.eval()将模型设置为评估模式,关闭 dropout 等训练时的特性。with torch.no_grad()关闭梯度计算。- 计算模型在测试集上的平均损失和准确率。
11. 训练和测试
epochs = 5
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer)
test(test_dataloader, model, loss_fn)
print("Done!")
- 运行 5 轮训练和测试,每一轮都打印训练和测试的结果。
12. 保存和加载模型
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")
model1 = NeuralNetwork().to(device)
model1.load_state_dict(torch.load("model.pth"))
torch.save(model.state_dict(), "model.pth")保存模型的参数(权重)。model1.load_state_dict(torch.load("model.pth"))加载模型参数。
13. 进行预测
model1.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
x = x.to(device)
pred = model1(x)
predicted, actual = classes[pred[0].argmax(0)], classes[y]
print(f'Predicted: "{predicted}", Actual: "{actual}"')
- 加载一个测试样本,使用模型进行预测,并打印预测的类别与实际类别。
总结:
这段代码展示了一个完整的机器学习工作流程,包括数据预处理、模型定义、训练、评估、模型保存和加载。最终,它在一个 FashionMNIST 数据集上训练并评估了一个简单的神经网络。