U-Net实战教学（2）- 模型的准备Overview

Founder · 2025 年3 月 6 日 16:02

import torch
import torch.nn as nn
import torch.nn.functional as F


class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()

        # if bilinear, use the normal convolutions to reduce the number of channels
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        # if you have padding issues, see
        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)


class UNet(nn.Module):
    def __init__(self, n_channels=1, n_classes=1, bilinear=False):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        factor = 2 if bilinear else 1
        self.down4 = Down(512, 1024 // factor)
        self.up1 = Up(1024, 512 // factor, bilinear)
        self.up2 = Up(512, 256 // factor, bilinear)
        self.up3 = Up(256, 128 // factor, bilinear)
        self.up4 = Up(128, 64, bilinear)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return torch.sigmoid(logits)  # Apply sigmoid for binary segmentation


# Define loss function for segmentation
class DiceLoss(nn.Module):
    def __init__(self, smooth=1.0):
        super(DiceLoss, self).__init__()
        self.smooth = smooth
        
    def forward(self, pred, target):
        pred_flat = pred.view(-1)
        target_flat = target.view(-1)
        
        intersection = (pred_flat * target_flat).sum()
        
        dice = (2. * intersection + self.smooth) / (pred_flat.sum() + target_flat.sum() + self.smooth)
        
        return 1 - dice


class BCEDiceLoss(nn.Module):
    def __init__(self, weight_bce=0.5, weight_dice=0.5):
        super(BCEDiceLoss, self).__init__()
        self.weight_bce = weight_bce
        self.weight_dice = weight_dice
        self.bce = nn.BCELoss()
        self.dice = DiceLoss()
        
    def forward(self, pred, target):
        bce_loss = self.bce(pred, target)
        dice_loss = self.dice(pred, target)
        
        return self.weight_bce * bce_loss + self.weight_dice * dice_loss

这段代码实现了一个 U-Net 网络，并包含一些辅助的组件，例如 损失函数（DiceLoss 和 BCEDiceLoss）。下面我会详细解析代码的结构，内容解释有些粗略，但后面的几个帖子会仔细讲解每部分内容。

1. 代码的主要功能

U-Net 是一种 用于图像分割 的深度学习网络架构，常用于医学图像分割。它的特点是：

编码部分（Contracting Path）：逐步减少空间分辨率，提取特征。
解码部分（Expanding Path）：逐步恢复空间分辨率，并进行像素级预测。
跳跃连接（Skip Connections）：在编码和解码之间传递信息，以保留细节。

这段代码主要实现了：

U-Net 的构造
U-Net 的前向传播（Forward）
用于优化的损失函数（DiceLoss + Binary Cross Entropy Loss）

2. 代码解析

代码分为几个部分，我们逐个拆解：

1. 头部导入库

import torch
import torch.nn as nn
import torch.nn.functional as F

torch 是 PyTorch 的核心库。
torch.nn 是 神经网络模块，提供各种层（如卷积层、激活函数、池化层等）。
torch.nn.functional 提供 函数式 API，如 F.pad()（填充）等。

2. 双卷积块 `DoubleConv`

class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

作用：

这个模块包含 两个 3×3 的卷积层，每个卷积层后面接 批归一化（BatchNorm） 和 ReLU 激活函数。
卷积层的作用：提取特征。
批归一化（BatchNorm）：加速训练，减少梯度消失。
ReLU 激活函数：增加非线性，提高模型表达能力。

数据流向（x 是输入的特征图）：

经过第一层 Conv2d → BatchNorm2d → ReLU
经过第二层 Conv2d → BatchNorm2d → ReLU
返回处理后的 x

3. 下采样（Down）

class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)

作用：

先进行最大池化（MaxPool2d(2)）：降低分辨率（H, W 除以 2）。
再进行两次卷积（DoubleConv）：提取特征。

4. 上采样（Up）

class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()

        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])

        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)

作用：

通过 Upsample（双线性插值）或 ConvTranspose2d（转置卷积）恢复分辨率。
通过 F.pad() 处理不同尺寸的特征图。
torch.cat() 跳跃连接（Skip Connection），拼接来自编码器的特征。

5. 输出层 `OutConv`

class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)

作用：

通过 1×1 卷积 将通道数变为 n_classes，输出分割结果。

6. U-Net 结构

class UNet(nn.Module):
    def __init__(self, n_channels=1, n_classes=1, bilinear=False):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        factor = 2 if bilinear else 1
        self.down4 = Down(512, 1024 // factor)
        self.up1 = Up(1024, 512 // factor, bilinear)
        self.up2 = Up(512, 256 // factor, bilinear)
        self.up3 = Up(256, 128 // factor, bilinear)
        self.up4 = Up(128, 64, bilinear)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return torch.sigmoid(logits)  # Apply sigmoid for binary segmentation

U-Net 数据流

逐步 下采样（Down）。
逐步 上采样（Up），并使用跳跃连接。
通过 OutConv 输出预测结果。

7. Dice Loss

class DiceLoss(nn.Module):
    def forward(self, pred, target):
        pred_flat = pred.view(-1)
        target_flat = target.view(-1)
        intersection = (pred_flat * target_flat).sum()
        dice = (2. * intersection + 1) / (pred_flat.sum() + target_flat.sum() + 1)
        return 1 - dice

作用：

计算 Dice 系数（衡量预测和真实值的相似度）。
值域为 [0,1]，越接近 1 说明预测越好。

总结

DoubleConv：双卷积模块
Down：下采样
Up：上采样
OutConv：输出层
UNet：完整的 U-Net
DiceLoss：损失函数，用于优化

这段代码完整实现了 U-Net 结构+损失函数，用于医学图像或其他分割任务！

U-Net实战教学（2）- 模型的准备Overview

1. 代码的主要功能

2. 代码解析

1. 头部导入库

2. 双卷积块 DoubleConv

3. 下采样（Down）

4. 上采样（Up）

5. 输出层 OutConv

6. U-Net 结构

7. Dice Loss

总结

2. 双卷积块 `DoubleConv`

5. 输出层 `OutConv`