import torch import torch.nn as nn import torch.optim as optim from torchvision import datasets, transforms import matplotlib.pyplot as plt # 1. 数据准备(以MNIST手写数字识别为例) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)) # 像素值归一化到[-1,1] ]) train_set = datasets.MNIST('data', download=True, train=True, transform=transform) test_set = datasets.MNIST('data', download=True, train=False, transform=transform) train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1000) # 2. 神经网络模型(演示梯度控制技巧) class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(784, 128) self.fc2 = nn.Linear(128, 64) self.fc3 = nn.Linear(64, 10) # He初始化适配ReLU nn.init.kaiming_normal_(self.fc1.weight, nonlinearity='relu') nn.init.kaiming_normal_(self.fc2.weight, nonlinearity='relu') def forward(self, x): x = x.view(-1, 784) # 展平图像 x = torch.relu(self.fc1(x)) x = torch.relu(self.fc2(x)) x = self.fc3(x) # 输出层无需激活(CrossEntropyLoss内置Softmax) return x # 3. 训练配置 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Net().to(device) optimizer = optim.Adam(model.parameters(), lr=0.001) criterion = nn.CrossEntropyLoss() # 梯度裁剪阈值 torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0) # 4. 训练过程可视化记录 train_losses = [] accuracies = [] def train(epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() # 记录训练损失 if batch_idx % 100 == 0: train_losses.append(loss.item()) # 5. 测试函数(含准确率计算) def test(): model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += criterion(output, target).item() pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() accuracy = 100. * correct / len(test_loader.dataset) accuracies.append(accuracy) return test_loss # 6. 执行训练(3个epoch演示) for epoch in range(1, 4): train(epoch) loss = test() print(f'Epoch {epoch}: Test Loss={loss:.4f}, Accuracy={accuracies[-1]:.2f}%') # 7. 可视化训练过程 plt.figure(figsize=(12,5)) plt.subplot(1,2,1) plt.plot(train_losses, label='Training Loss') plt.title("Loss Curve") plt.subplot(1,2,2) plt.plot(accuracies, label='Accuracy', color='orange') plt.title("Accuracy Curve") plt.show() # 8. 示例预测展示 sample_data, sample_label = next(iter(test_loader)) with torch.no_grad(): prediction = model(sample_data.to(device)).argmax(dim=1) # 显示预测结果对比 plt.figure(figsize=(10,6)) for i in range(6): plt.subplot(2,3,i+1) plt.imshow(sample_data[i][0], cmap='gray') plt.title(f"True: {sample_label[i]}\nPred: {prediction[i].item()}") plt.axis('off') plt.tight_layout() plt.show()