# Auto detect text files and perform LF normalization
* text=auto
## 代码运行
python -c ./utils/conf.json
- 联邦训练配置:一共10台客户端设备(no\_models=10),每一轮任意挑选其中的5台参与训练(k=5), 每一次本地训练迭代次数为3次(local\_epochs=3),全局迭代次数为20次(global\_epochs=20)。
- 集中式训练配置:我们不需要单独编写集中式训练代码,只需要修改联邦学习配置既可使其等价于集中式训练。具体来说,我们将客户端设备no\_models和每一轮挑选的参与训练设备数k都设为1即可。这样只有1台设备参与的联邦训练等价于集中式训练。其余参数配置信息与联邦学习训练一致。图中我们将局部迭代次数分别设置了1,2,3来进行比较。
'''Convolutional Block Attention Module (CBAM)
import torch
import torch.nn as nn
from torch.nn.modules import pooling
from torch.nn.modules.flatten import Flatten
class Channel_Attention(nn.Module):
'''Channel Attention in CBAM.
def __init__(self, channel_in, reduction_ratio=16, pool_types=['avg', 'max']):
'''Param init and architecture building.
super(Channel_Attention, self).__init__()
self.pool_types = pool_types
self.shared_mlp = nn.Sequential(
nn.Linear(in_features=channel_in, out_features=channel_in//reduction_ratio),
nn.Linear(in_features=channel_in//reduction_ratio, out_features=channel_in)
def forward(self, x):
'''Forward Propagation.
channel_attentions = []
for pool_types in self.pool_types:
if pool_types == 'avg':
pool_init = nn.AvgPool2d(kernel_size=(x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
avg_pool = pool_init(x)
elif pool_types == 'max':
pool_init = nn.MaxPool2d(kernel_size=(x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
max_pool = pool_init(x)
pooling_sums = torch.stack(channel_attentions, dim=0).sum(dim=0)
scaled = nn.Sigmoid()(pooling_sums).unsqueeze(2).unsqueeze(3).expand_as(x)
return x * scaled #return the element-wise multiplication between the input and the result.
class ChannelPool(nn.Module):
'''Merge all the channels in a feature map into two separate channels where the first channel is produced by taking the max values from all channels, while the
second one is produced by taking the mean from every channel.
def forward(self, x):
return, 1)[0].unsqueeze(1), torch.mean(x, 1).unsqueeze(1)), dim=1)
class Spatial_Attention(nn.Module):
'''Spatial Attention in CBAM.
def __init__(self, kernel_size=7):
'''Spatial Attention Architecture.
super(Spatial_Attention, self).__init__()
self.compress = ChannelPool()
self.spatial_attention = nn.Sequential(
nn.Conv2d(in_channels=2, out_channels=1, kernel_size=kernel_size, stride=1, dilation=1, padding=(kernel_size-1)//2, bias=False),
nn.BatchNorm2d(num_features=1, eps=1e-5, momentum=0.01, affine=True)
def forward(self, x):
'''Forward Propagation.
x_compress = self.compress(x)
x_output = self.spatial_attention(x_compress)
scaled = nn.Sigmoid()(x_output)
return x * scaled
class CBAM(nn.Module):
'''CBAM architecture.
def __init__(self, channel_in, reduction_ratio=16, pool_types=['avg', 'max'], spatial=True):
'''Param init and arch build.
super(CBAM, self).__init__()
self.spatial = spatial
self.channel_attention = Channel_Attention(channel_in=channel_in, reduction_ratio=reduction_ratio, pool_types=pool_types)
if self.spatial:
self.spatial_attention = Spatial_Attention(kernel_size=7)
def forward(self, x):
'''Forward Propagation.
x_out = self.channel_attention(x)
if self.spatial:
x_out = self.spatial_attention(x_out)
return x_out
import numpy as np
import models, torch, copy
class Client(object):
def __init__(self, conf, model, train_dataset, id = -1):
self.conf = conf
self.local_model = models.get_model(self.conf["model_name"])
self.client_id = id
self.train_dataset = train_dataset
all_range = list(range(len(self.train_dataset)))
data_len = int(len(self.train_dataset) / self.conf['no_models'])
train_indices = all_range[id * data_len: (id + 1) * data_len]
self.train_loader =, batch_size=conf["batch_size"],
def local_train(self, model):
for name, param in model.state_dict().items():
optimizer = torch.optim.SGD(self.local_model.parameters(), lr=self.conf['lr'],
for e in range(self.conf["local_epochs"]):
for batch_id, batch in enumerate(self.train_loader):
data, target = batch
if torch.cuda.is_available():
data = data.cuda()
target = target.cuda()
output = self.local_model(data)
# print(type(output))
# target=np.array(target).astype(int)
# target=torch.from_numpy(target)
loss = torch.nn.functional.cross_entropy(output, target)
if self.conf["dp"]:
model_norm = models.model_norm(model, self.local_model)
norm_scale = min(1, self.conf['C'] / (model_norm))
#print(model_norm, norm_scale)
for name, layer in self.local_model.named_parameters():
clipped_difference = norm_scale * ( - model.state_dict()[name])
||||[name] + clipped_difference)
print("Epoch %d done." % e)
diff = dict()
for name, data in self.local_model.state_dict().items():
diff[name] = (data - model.state_dict()[name])
return diff
import models, torch, copy
class Client(object):
def __init__(self, conf, model, train_dataset, id = -1):
self.conf = conf
self.local_model = models.get_model(self.conf["model_name"])
self.client_id = id
self.train_dataset = train_dataset
all_range = list(range(len(self.train_dataset)))
data_len = int(len(self.train_dataset) / self.conf['no_models'])
train_indices = all_range[id * data_len: (id + 1) * data_len]
self.train_loader =, batch_size=conf["batch_size"],
def local_train(self, model):
for name, param in model.state_dict().items():
#print("\n\nlocal model train ... ... ")
#for name, layer in self.local_model.named_parameters():
# print(name, "->", torch.mean(
optimizer = torch.optim.SGD(self.local_model.parameters(), lr=self.conf['lr'],
for e in range(self.conf["local_epochs"]):
for batch_id, batch in enumerate(self.train_loader):
data, target = batch
#for name, layer in self.local_model.named_parameters():
# print(torch.mean(self.local_model.state_dict()[name].data))
if torch.cuda.is_available():
data = data.cuda()
target = target.cuda()
output = self.local_model(data)
loss = torch.nn.functional.cross_entropy(output, target)
#for name, layer in self.local_model.named_parameters():
# print(torch.mean(self.local_model.state_dict()[name].data))
if self.conf["dp"]:
model_norm = models.model_norm(model, self.local_model)
norm_scale = min(1, self.conf['C'] / (model_norm))
#print(model_norm, norm_scale)
for name, layer in self.local_model.named_parameters():
clipped_difference = norm_scale * ( - model.state_dict()[name])
||||[name] + clipped_difference)
print("Epoch %d done." % e)
diff = dict()
for name, data in self.local_model.state_dict().items():
diff[name] = (data - model.state_dict()[name])
#print("\n\nfinishing local model training ... ... ")
#for name, layer in self.local_model.named_parameters():
# print(name, "->", torch.mean(
return diff
import torch
import torch.nn as nn
from einops import rearrange
from einops.layers.torch import Rearrange
def conv_3x3_bn(inp, oup, image_size, downsample=False):
stride = 1 if downsample == False else 2
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
class PreNorm(nn.Module):
def __init__(self, dim, fn, norm):
self.norm = norm(dim)
self.fn = fn
def forward(self, x, **kwargs):
return self.fn(self.norm(x), **kwargs)
class SE(nn.Module):
def __init__(self, inp, oup, expansion=0.25):
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(oup, int(inp * expansion), bias=False),
nn.Linear(int(inp * expansion), oup, bias=False),
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
return x * y
class FeedForward(nn.Module):
def __init__(self, dim, hidden_dim, dropout=0.):
|||| = nn.Sequential(
nn.Linear(dim, hidden_dim),
nn.Linear(hidden_dim, dim),
def forward(self, x):
class MBConv(nn.Module):
def __init__(self, inp, oup, image_size, downsample=False, expansion=4):
self.downsample = downsample
stride = 1 if self.downsample == False else 2
hidden_dim = int(inp * expansion)
if self.downsample:
self.pool = nn.MaxPool2d(3, 2, 1)
self.proj = nn.Conv2d(inp, oup, 1, 1, 0, bias=False)
if expansion == 1:
self.conv = nn.Sequential(
# dw
nn.Conv2d(hidden_dim, hidden_dim, 3, stride,
1, groups=hidden_dim, bias=False),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
self.conv = nn.Sequential(
# pw
# down-sample in the first conv
nn.Conv2d(inp, hidden_dim, 1, stride, 0, bias=False),
# dw
nn.Conv2d(hidden_dim, hidden_dim, 3, 1, 1,
groups=hidden_dim, bias=False),
SE(inp, hidden_dim),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
self.conv = PreNorm(inp, self.conv, nn.BatchNorm2d)
def forward(self, x):
if self.downsample:
return self.proj(self.pool(x)) + self.conv(x)
return x + self.conv(x)
class Attention(nn.Module):
def __init__(self, inp, oup, image_size, heads=8, dim_head=32, dropout=0.):
inner_dim = dim_head * heads
project_out = not (heads == 1 and dim_head == inp)
self.ih, self.iw = image_size
self.heads = heads
self.scale = dim_head ** -0.5
# parameter table of relative position bias
self.relative_bias_table = nn.Parameter(
torch.zeros((2 * self.ih - 1) * (2 * self.iw - 1), heads))
coords = torch.meshgrid((torch.arange(self.ih), torch.arange(self.iw)))
coords = torch.flatten(torch.stack(coords), 1)
relative_coords = coords[:, :, None] - coords[:, None, :]
relative_coords[0] += self.ih - 1
relative_coords[1] += self.iw - 1
relative_coords[0] *= 2 * self.iw - 1
relative_coords = rearrange(relative_coords, 'c h w -> h w c')
relative_index = relative_coords.sum(-1).flatten().unsqueeze(1)
self.register_buffer("relative_index", relative_index)
self.attend = nn.Softmax(dim=-1)
self.to_qkv = nn.Linear(inp, inner_dim * 3, bias=False)
self.to_out = nn.Sequential(
nn.Linear(inner_dim, oup),
) if project_out else nn.Identity()
def forward(self, x):
qkv = self.to_qkv(x).chunk(3, dim=-1)
q, k, v = map(lambda t: rearrange(
t, 'b n (h d) -> b h n d', h=self.heads), qkv)
dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
# Use "gather" for more efficiency on GPUs
relative_bias = self.relative_bias_table.gather(
0, self.relative_index.repeat(1, self.heads))
relative_bias = rearrange(
relative_bias, '(h w) c -> 1 c h w', h=self.ih*self.iw, w=self.ih*self.iw)
dots = dots + relative_bias
attn = self.attend(dots)
out = torch.matmul(attn, v)
out = rearrange(out, 'b h n d -> b n (h d)')
out = self.to_out(out)
return out
class Transformer(nn.Module):
def __init__(self, inp, oup, image_size, heads=8, dim_head=32, downsample=False, dropout=0.):
hidden_dim = int(inp * 4)
self.ih, self.iw = image_size
self.downsample = downsample
if self.downsample:
self.pool1 = nn.MaxPool2d(3, 2, 1)
self.pool2 = nn.MaxPool2d(3, 2, 1)
self.proj = nn.Conv2d(inp, oup, 1, 1, 0, bias=False)
self.attn = Attention(inp, oup, image_size, heads, dim_head, dropout)
self.ff = FeedForward(oup, hidden_dim, dropout)
self.attn = nn.Sequential(
Rearrange('b c ih iw -> b (ih iw) c'),
PreNorm(inp, self.attn, nn.LayerNorm),
Rearrange('b (ih iw) c -> b c ih iw', ih=self.ih, iw=self.iw)
self.ff = nn.Sequential(
Rearrange('b c ih iw -> b (ih iw) c'),
PreNorm(oup, self.ff, nn.LayerNorm),
Rearrange('b (ih iw) c -> b c ih iw', ih=self.ih, iw=self.iw)
def forward(self, x):
if self.downsample:
x = self.proj(self.pool1(x)) + self.attn(self.pool2(x))
x = x + self.attn(x)
x = x + self.ff(x)
return x
class CoAtNet(nn.Module):
def __init__(self, image_size, in_channels, num_blocks, channels, num_classes=1000, block_types=['C', 'C', 'T', 'T']):
ih, iw = image_size
block = {'C': MBConv, 'T': Transformer}
self.s0 = self._make_layer(
conv_3x3_bn, in_channels, channels[0], num_blocks[0], (ih // 2, iw // 2))
self.s1 = self._make_layer(
block[block_types[0]], channels[0], channels[1], num_blocks[1], (ih // 4, iw // 4))
self.s2 = self._make_layer(
block[block_types[1]], channels[1], channels[2], num_blocks[2], (ih // 8, iw // 8))
self.s3 = self._make_layer(
block[block_types[2]], channels[2], channels[3], num_blocks[3], (ih // 16, iw // 16))
self.s4 = self._make_layer(
block[block_types[3]], channels[3], channels[4], num_blocks[4], (ih // 32, iw // 32))
self.pool = nn.AvgPool2d(ih // 32, 1)
self.fc = nn.Linear(channels[-1], num_classes, bias=False)
def forward(self, x):
x = self.s0(x)
x = self.s1(x)
x = self.s2(x)
x = self.s3(x)
x = self.s4(x)
x = self.pool(x).view(-1, x.shape[1])
x = self.fc(x)
return x
def _make_layer(self, block, inp, oup, depth, image_size):
layers = nn.ModuleList([])
for i in range(depth):
if i == 0:
layers.append(block(inp, oup, image_size, downsample=True))
layers.append(block(oup, oup, image_size))
return nn.Sequential(*layers)
def coatnet_0():
num_blocks = [2, 2, 3, 5, 2] # L
channels = [64, 96, 192, 384, 768] # D
return CoAtNet((224, 224), 3, num_blocks, channels, num_classes=1000)
def coatnet_1():
num_blocks = [2, 2, 6, 14, 2] # L
channels = [64, 96, 192, 384, 768] # D
return CoAtNet((224, 224), 3, num_blocks, channels, num_classes=1000)
def coatnet_2():
num_blocks = [2, 2, 6, 14, 2] # L
channels = [128, 128, 256, 512, 1026] # D
return CoAtNet((224, 224), 3, num_blocks, channels, num_classes=1000)
def coatnet_3():
num_blocks = [2, 2, 6, 14, 2] # L
channels = [192, 192, 384, 768, 1536] # D
return CoAtNet((224, 224), 3, num_blocks, channels, num_classes=1000)
def coatnet_4():
num_blocks = [2, 2, 12, 28, 2] # L
channels = [192, 192, 384, 768, 1536] # D
return CoAtNet((224, 224), 3, num_blocks, channels, num_classes=1000)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
if __name__ == '__main__':
img = torch.randn(1, 3, 224, 224)
net = coatnet_0()
out = net(img)
print(out.shape, count_parameters(net))
net = coatnet_1()
out = net(img)
print(out.shape, count_parameters(net))
net = coatnet_2()
out = net(img)
print(out.shape, count_parameters(net))
net = coatnet_3()
out = net(img)
print(out.shape, count_parameters(net))
net = coatnet_4()
out = net(img)
print(out.shape, count_parameters(net))
import torch
import torch.nn as nn
import math
import torch.nn.functional as F
class h_sigmoid(nn.Module):
def __init__(self, inplace=True):
super(h_sigmoid, self).__init__()
self.relu = nn.ReLU6(inplace=inplace)
def forward(self, x):
return self.relu(x + 3) / 6
class h_swish(nn.Module):
def __init__(self, inplace=True):
super(h_swish, self).__init__()
self.sigmoid = h_sigmoid(inplace=inplace)
def forward(self, x):
return x * self.sigmoid(x)
class CoordAtt(nn.Module):
def __init__(self, inp, oup, reduction=32):
super(CoordAtt, self).__init__()
self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
self.pool_w = nn.AdaptiveAvgPool2d((1, None))
mip = max(8, inp // reduction)
self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
self.bn1 = nn.BatchNorm2d(mip)
self.act = h_swish()
self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
def forward(self, x):
identity = x
n,c,h,w = x.size()
x_h = self.pool_h(x)
x_w = self.pool_w(x).permute(0, 1, 3, 2)
y =[x_h, x_w], dim=2)
y = self.conv1(y)
y = self.bn1(y)
y = self.act(y)
x_h, x_w = torch.split(y, [h, w], dim=2)
x_w = x_w.permute(0, 1, 3, 2)
a_h = self.conv_h(x_h).sigmoid()
a_w = self.conv_w(x_w).sigmoid()
out = identity * a_w * a_h
return out
import argparse, json
import datetime
import os
import logging
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import torch, random
from server import *
from client import *
import models, datasets
from torchvision.datasets import ImageFolder
import torch
from torchvision import transforms, datasets
import torch.nn as nn
from import DataLoader
import torchvision
import matplotlib.pyplot as plt
import numpy as np
from log import get_log
from torch import randperm
import os
logger = get_log('/home/ykn/cds/chapter03_Python_image_classification/log/log.txt')
||||"MSE: %.6f" % (mse))
||||"RMSE: %.6f" % (rmse))
||||"MAE: %.6f" % (mae))
||||"MAPE: %.6f" % (mape))
transforms = transforms.Compose([
transforms.Resize(256), # 将图片短边缩放至256,长宽比保持不变:
transforms.CenterCrop(224), #将图片从中心切剪成3*224*224大小的图片
transforms.ToTensor() #把图片进行归一化,并把数据转换成Tensor类型
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Federated Learning')
parser.add_argument('--conf', default = '/home/ykn/cds/chapter03_Python_image_classification/utils/conf.json', dest='conf')
args = parser.parse_args()
with open(args.conf, 'r') as f:
conf = json.load(f)
path1 = '/home/ykn/cds/chapter03_Python_image_classification/data/Brain Tumor MRI Dataset/archive/Training'
path2 = '/home/ykn/cds/chapter03_Python_image_classification/data/Brain Tumor MRI Dataset/archive/Testing'
data_train = datasets.ImageFolder(path1, transform=transforms)
data_test = datasets.ImageFolder(path2, transform=transforms)
# data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
# for i, data in enumerate(data_loader):
# images, labels = data
# img = torchvision.utils.make_grid(images).numpy()
# plt.imshow(np.transpose(img, (1, 2, 0)))
# train_datasets, eval_datasets = datasets.get_dataset("./data/", conf["type"])
# data_train_shuffle = DataLoader(data_train, batch_size=64, shuffle=True)
# data_test_shuffle = DataLoader(data_test, batch_size=64, shuffle=True)
# print(data_train_shuffle)
lenth_train = randperm(len(data_train)).tolist() # 生成乱序的索引
data_train_shuffle =, lenth_train)
lenth_test = randperm(len(data_test)).tolist() # 生成乱序的索引
data_test_shuffle =, lenth_test)
train_datasets, eval_datasets = data_train_shuffle, data_test_shuffle
server = Server(conf, eval_datasets)
clients = []
for c in range(conf["no_models"]):
clients.append(Client(conf, server.global_model, train_datasets, c))
for e in range(conf["global_epochs"]):
for client in clients[:conf['k']]:
weight_accumulator = {}
for name, params in server.global_model.state_dict().items():
weight_accumulator[name] = torch.zeros_like(params)
diff = client.local_train(server.global_model)
for name, params in server.global_model.state_dict().items():
acc, loss = server.model_eval()
print("Epoch %d, acc: %f, loss: %f\n" % (e, acc, loss))
