111 lines
3.5 KiB
Python
111 lines
3.5 KiB
Python
import torch
|
||
import torch.nn as nn
|
||
import torch.optim as optim
|
||
from torchvision import datasets, transforms
|
||
import matplotlib.pyplot as plt
|
||
|
||
# 1. 数据准备(以MNIST手写数字识别为例)
|
||
transform = transforms.Compose([
|
||
transforms.ToTensor(),
|
||
transforms.Normalize((0.5,), (0.5,)) # 像素值归一化到[-1,1]
|
||
])
|
||
|
||
train_set = datasets.MNIST('data', download=True, train=True, transform=transform)
|
||
test_set = datasets.MNIST('data', download=True, train=False, transform=transform)
|
||
|
||
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
|
||
test_loader = torch.utils.data.DataLoader(test_set, batch_size=1000)
|
||
|
||
# 2. 神经网络模型(演示梯度控制技巧)
|
||
class Net(nn.Module):
|
||
def __init__(self):
|
||
super(Net, self).__init__()
|
||
self.fc1 = nn.Linear(784, 128)
|
||
self.fc2 = nn.Linear(128, 64)
|
||
self.fc3 = nn.Linear(64, 10)
|
||
|
||
# He初始化适配ReLU
|
||
nn.init.kaiming_normal_(self.fc1.weight, nonlinearity='relu')
|
||
nn.init.kaiming_normal_(self.fc2.weight, nonlinearity='relu')
|
||
|
||
def forward(self, x):
|
||
x = x.view(-1, 784) # 展平图像
|
||
x = torch.relu(self.fc1(x))
|
||
x = torch.relu(self.fc2(x))
|
||
x = self.fc3(x) # 输出层无需激活(CrossEntropyLoss内置Softmax)
|
||
return x
|
||
|
||
# 3. 训练配置
|
||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||
model = Net().to(device)
|
||
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
||
criterion = nn.CrossEntropyLoss()
|
||
|
||
# 梯度裁剪阈值
|
||
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0)
|
||
|
||
# 4. 训练过程可视化记录
|
||
train_losses = []
|
||
accuracies = []
|
||
|
||
def train(epoch):
|
||
model.train()
|
||
for batch_idx, (data, target) in enumerate(train_loader):
|
||
data, target = data.to(device), target.to(device)
|
||
optimizer.zero_grad()
|
||
output = model(data)
|
||
loss = criterion(output, target)
|
||
loss.backward()
|
||
optimizer.step()
|
||
|
||
# 记录训练损失
|
||
if batch_idx % 100 == 0:
|
||
train_losses.append(loss.item())
|
||
|
||
# 5. 测试函数(含准确率计算)
|
||
def test():
|
||
model.eval()
|
||
test_loss = 0
|
||
correct = 0
|
||
with torch.no_grad():
|
||
for data, target in test_loader:
|
||
data, target = data.to(device), target.to(device)
|
||
output = model(data)
|
||
test_loss += criterion(output, target).item()
|
||
pred = output.argmax(dim=1, keepdim=True)
|
||
correct += pred.eq(target.view_as(pred)).sum().item()
|
||
|
||
accuracy = 100. * correct / len(test_loader.dataset)
|
||
accuracies.append(accuracy)
|
||
return test_loss
|
||
|
||
# 6. 执行训练(3个epoch演示)
|
||
for epoch in range(1, 4):
|
||
train(epoch)
|
||
loss = test()
|
||
print(f'Epoch {epoch}: Test Loss={loss:.4f}, Accuracy={accuracies[-1]:.2f}%')
|
||
|
||
# 7. 可视化训练过程
|
||
plt.figure(figsize=(12,5))
|
||
plt.subplot(1,2,1)
|
||
plt.plot(train_losses, label='Training Loss')
|
||
plt.title("Loss Curve")
|
||
plt.subplot(1,2,2)
|
||
plt.plot(accuracies, label='Accuracy', color='orange')
|
||
plt.title("Accuracy Curve")
|
||
plt.show()
|
||
|
||
# 8. 示例预测展示
|
||
sample_data, sample_label = next(iter(test_loader))
|
||
with torch.no_grad():
|
||
prediction = model(sample_data.to(device)).argmax(dim=1)
|
||
|
||
# 显示预测结果对比
|
||
plt.figure(figsize=(10,6))
|
||
for i in range(6):
|
||
plt.subplot(2,3,i+1)
|
||
plt.imshow(sample_data[i][0], cmap='gray')
|
||
plt.title(f"True: {sample_label[i]}\nPred: {prediction[i].item()}")
|
||
plt.axis('off')
|
||
plt.tight_layout()
|
||
plt.show() |