import torch
import torch.nn as nn
import torch.nn.functional as F
class DoubleConv(nn.Module):
"""(convolution => [BN] => ReLU) * 2"""
def __init__(self, in_channels, out_channels, mid_channels=None):
super().__init__()
if not mid_channels:
mid_channels = out_channels
self.double_conv = nn.Sequential(
nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(mid_channels),
nn.ReLU(inplace=True),
nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
def forward(self, x):
return self.double_conv(x)
class Down(nn.Module):
"""Downscaling with maxpool then double conv"""
def __init__(self, in_channels, out_channels):
super().__init__()
self.maxpool_conv = nn.Sequential(
nn.MaxPool2d(2),
DoubleConv(in_channels, out_channels)
)
def forward(self, x):
return self.maxpool_conv(x)
class Up(nn.Module):
"""Upscaling then double conv"""
def __init__(self, in_channels, out_channels, bilinear=True):
super().__init__()
# if bilinear, use the normal convolutions to reduce the number of channels
if bilinear:
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
else:
self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
self.conv = DoubleConv(in_channels, out_channels)
def forward(self, x1, x2):
x1 = self.up(x1)
# input is CHW
diffY = x2.size()[2] - x1.size()[2]
diffX = x2.size()[3] - x1.size()[3]
x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
diffY // 2, diffY - diffY // 2])
# if you have padding issues, see
# https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
# https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
x = torch.cat([x2, x1], dim=1)
return self.conv(x)
class OutConv(nn.Module):
def __init__(self, in_channels, out_channels):
super(OutConv, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
def forward(self, x):
return self.conv(x)
class UNet(nn.Module):
def __init__(self, n_channels=1, n_classes=1, bilinear=False):
super(UNet, self).__init__()
self.n_channels = n_channels
self.n_classes = n_classes
self.bilinear = bilinear
self.inc = DoubleConv(n_channels, 64)
self.down1 = Down(64, 128)
self.down2 = Down(128, 256)
self.down3 = Down(256, 512)
factor = 2 if bilinear else 1
self.down4 = Down(512, 1024 // factor)
self.up1 = Up(1024, 512 // factor, bilinear)
self.up2 = Up(512, 256 // factor, bilinear)
self.up3 = Up(256, 128 // factor, bilinear)
self.up4 = Up(128, 64, bilinear)
self.outc = OutConv(64, n_classes)
def forward(self, x):
x1 = self.inc(x)
x2 = self.down1(x1)
x3 = self.down2(x2)
x4 = self.down3(x3)
x5 = self.down4(x4)
x = self.up1(x5, x4)
x = self.up2(x, x3)
x = self.up3(x, x2)
x = self.up4(x, x1)
logits = self.outc(x)
return torch.sigmoid(logits) # Apply sigmoid for binary segmentation
# Define loss function for segmentation
class DiceLoss(nn.Module):
def __init__(self, smooth=1.0):
super(DiceLoss, self).__init__()
self.smooth = smooth
def forward(self, pred, target):
pred_flat = pred.view(-1)
target_flat = target.view(-1)
intersection = (pred_flat * target_flat).sum()
dice = (2. * intersection + self.smooth) / (pred_flat.sum() + target_flat.sum() + self.smooth)
return 1 - dice
class BCEDiceLoss(nn.Module):
def __init__(self, weight_bce=0.5, weight_dice=0.5):
super(BCEDiceLoss, self).__init__()
self.weight_bce = weight_bce
self.weight_dice = weight_dice
self.bce = nn.BCELoss()
self.dice = DiceLoss()
def forward(self, pred, target):
bce_loss = self.bce(pred, target)
dice_loss = self.dice(pred, target)
return self.weight_bce * bce_loss + self.weight_dice * dice_loss
这段代码实现了一个 U-Net 网络,并包含一些辅助的组件,例如 损失函数(DiceLoss 和 BCEDiceLoss)。下面我会详细解析代码的结构,内容解释有些粗略,但后面的几个帖子会仔细讲解每部分内容。
1. 代码的主要功能
U-Net 是一种 用于图像分割 的深度学习网络架构,常用于医学图像分割。它的特点是:
- 编码部分(Contracting Path):逐步减少空间分辨率,提取特征。
- 解码部分(Expanding Path):逐步恢复空间分辨率,并进行像素级预测。
- 跳跃连接(Skip Connections):在编码和解码之间传递信息,以保留细节。
这段代码主要实现了:
- U-Net 的构造
- U-Net 的前向传播(Forward)
- 用于优化的损失函数(DiceLoss + Binary Cross Entropy Loss)
2. 代码解析
代码分为几个部分,我们逐个拆解:
1. 头部导入库
import torch
import torch.nn as nn
import torch.nn.functional as F
torch
是 PyTorch 的核心库。torch.nn
是 神经网络模块,提供各种层(如卷积层、激活函数、池化层等)。torch.nn.functional
提供 函数式 API,如F.pad()
(填充)等。
2. 双卷积块 DoubleConv
class DoubleConv(nn.Module):
"""(convolution => [BN] => ReLU) * 2"""
def __init__(self, in_channels, out_channels, mid_channels=None):
super().__init__()
if not mid_channels:
mid_channels = out_channels
self.double_conv = nn.Sequential(
nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(mid_channels),
nn.ReLU(inplace=True),
nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
def forward(self, x):
return self.double_conv(x)
作用:
- 这个模块包含 两个 3×3 的卷积层,每个卷积层后面接 批归一化(BatchNorm) 和 ReLU 激活函数。
- 卷积层的作用:提取特征。
- 批归一化(BatchNorm):加速训练,减少梯度消失。
- ReLU 激活函数:增加非线性,提高模型表达能力。
数据流向(x 是输入的特征图):
- 经过第一层
Conv2d
→BatchNorm2d
→ReLU
- 经过第二层
Conv2d
→BatchNorm2d
→ReLU
- 返回处理后的
x
3. 下采样(Down)
class Down(nn.Module):
"""Downscaling with maxpool then double conv"""
def __init__(self, in_channels, out_channels):
super().__init__()
self.maxpool_conv = nn.Sequential(
nn.MaxPool2d(2),
DoubleConv(in_channels, out_channels)
)
def forward(self, x):
return self.maxpool_conv(x)
作用:
- 先进行最大池化(
MaxPool2d(2)
):降低分辨率(H, W 除以 2)。 - 再进行两次卷积(
DoubleConv
):提取特征。
4. 上采样(Up)
class Up(nn.Module):
"""Upscaling then double conv"""
def __init__(self, in_channels, out_channels, bilinear=True):
super().__init__()
if bilinear:
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
else:
self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
self.conv = DoubleConv(in_channels, out_channels)
def forward(self, x1, x2):
x1 = self.up(x1)
diffY = x2.size()[2] - x1.size()[2]
diffX = x2.size()[3] - x1.size()[3]
x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
diffY // 2, diffY - diffY // 2])
x = torch.cat([x2, x1], dim=1)
return self.conv(x)
作用:
- 通过
Upsample
(双线性插值)或ConvTranspose2d
(转置卷积)恢复分辨率。 - 通过
F.pad()
处理不同尺寸的特征图。 torch.cat()
跳跃连接(Skip Connection),拼接来自编码器的特征。
5. 输出层 OutConv
class OutConv(nn.Module):
def __init__(self, in_channels, out_channels):
super(OutConv, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
def forward(self, x):
return self.conv(x)
作用:
- 通过 1×1 卷积 将通道数变为
n_classes
,输出分割结果。
6. U-Net 结构
class UNet(nn.Module):
def __init__(self, n_channels=1, n_classes=1, bilinear=False):
super(UNet, self).__init__()
self.n_channels = n_channels
self.n_classes = n_classes
self.bilinear = bilinear
self.inc = DoubleConv(n_channels, 64)
self.down1 = Down(64, 128)
self.down2 = Down(128, 256)
self.down3 = Down(256, 512)
factor = 2 if bilinear else 1
self.down4 = Down(512, 1024 // factor)
self.up1 = Up(1024, 512 // factor, bilinear)
self.up2 = Up(512, 256 // factor, bilinear)
self.up3 = Up(256, 128 // factor, bilinear)
self.up4 = Up(128, 64, bilinear)
self.outc = OutConv(64, n_classes)
def forward(self, x):
x1 = self.inc(x)
x2 = self.down1(x1)
x3 = self.down2(x2)
x4 = self.down3(x3)
x5 = self.down4(x4)
x = self.up1(x5, x4)
x = self.up2(x, x3)
x = self.up3(x, x2)
x = self.up4(x, x1)
logits = self.outc(x)
return torch.sigmoid(logits) # Apply sigmoid for binary segmentation
U-Net 数据流
- 逐步 下采样(Down)。
- 逐步 上采样(Up),并使用跳跃连接。
- 通过
OutConv
输出预测结果。
7. Dice Loss
class DiceLoss(nn.Module):
def forward(self, pred, target):
pred_flat = pred.view(-1)
target_flat = target.view(-1)
intersection = (pred_flat * target_flat).sum()
dice = (2. * intersection + 1) / (pred_flat.sum() + target_flat.sum() + 1)
return 1 - dice
作用:
- 计算 Dice 系数(衡量预测和真实值的相似度)。
- 值域为
[0,1]
,越接近 1 说明预测越好。
总结
DoubleConv
:双卷积模块Down
:下采样Up
:上采样OutConv
:输出层UNet
:完整的 U-NetDiceLoss
:损失函数,用于优化
这段代码完整实现了 U-Net 结构+损失函数,用于医学图像或其他分割任务!