423 lines
16 KiB
Python
423 lines
16 KiB
Python
|
|
|||
|
# 主网络
|
|||
|
import torch
|
|||
|
import torch.nn as nn
|
|||
|
import torch.optim as optim
|
|||
|
import torch.utils.data as data
|
|||
|
from torch.nn.functional import normalize
|
|||
|
import numpy as np
|
|||
|
import pickle
|
|||
|
import glob
|
|||
|
from timeit import default_timer as timer
|
|||
|
import time
|
|||
|
import warnings
|
|||
|
|
|||
|
# 用 BiGRU 网络试试
|
|||
|
from model import *
|
|||
|
|
|||
|
# 不输出“Warning”信息
|
|||
|
# warnings.filterwarnings("ignore")
|
|||
|
"""
|
|||
|
出现过错误的地方,反映了基础不牢
|
|||
|
交叉熵:https://blog.csdn.net/dss_dssssd/article/details/84036913
|
|||
|
label 转 one-hot:https://blog.csdn.net/qq_34914551/article/details/88700334
|
|||
|
|
|||
|
实验自适应据类
|
|||
|
"""
|
|||
|
|
|||
|
LSTM_UNITS = 92
|
|||
|
MINI_BATCH = 10
|
|||
|
TRAIN_STEPS_PER_EPOCH = 12000
|
|||
|
VALIDATION_STEPS_PER_EPOCH = 800
|
|||
|
DATA_DIR = 'D:\\PyProject\\malware_traffic\\3_Packet\\'
|
|||
|
CHECKPOINTS_DIR = './checkpoints/'
|
|||
|
|
|||
|
dict_5class = {0: 'Normal', 1: 'BFSSH', 2: 'Infilt', 3: 'HttpDoS', 4: 'DDoS'}
|
|||
|
class_num = 5
|
|||
|
|
|||
|
PACKET_NUM_PER_SESSION = 14
|
|||
|
PACKET_LEN = 100
|
|||
|
|
|||
|
|
|||
|
class ByteBlock(nn.Module):
|
|||
|
"""
|
|||
|
卷积神经网络
|
|||
|
"""
|
|||
|
def __init__(self, in_channels, nb_filter=(64, 100), filter_length=(3, 3),
|
|||
|
subsample=(2, 1), pool_length=(2, 2)):
|
|||
|
super(ByteBlock, self).__init__()
|
|||
|
|
|||
|
layers = []
|
|||
|
for i in range(len(nb_filter)):
|
|||
|
layers.append(nn.Conv1d(in_channels, nb_filter[i], kernel_size=filter_length[i],
|
|||
|
padding=0, stride=subsample[i]))
|
|||
|
layers.append(nn.Tanh())
|
|||
|
if pool_length[i]:
|
|||
|
layers.append(nn.MaxPool1d(pool_length[i]))
|
|||
|
in_channels = nb_filter[i]
|
|||
|
|
|||
|
self.block = nn.Sequential(*layers)
|
|||
|
self.global_pool = nn.AdaptiveMaxPool1d(1)
|
|||
|
self.fc = nn.Linear(nb_filter[-1], 128)
|
|||
|
|
|||
|
def forward(self, x):
|
|||
|
x = self.block(x)
|
|||
|
x = self.global_pool(x).squeeze(dim=2)
|
|||
|
x = torch.nn.functional.relu(self.fc(x))
|
|||
|
return x
|
|||
|
|
|||
|
|
|||
|
def update_confusion_matrix(confusion_matrix, actual_lb, predict_lb):
|
|||
|
for idx, value in enumerate(actual_lb):
|
|||
|
p_value = predict_lb[idx]
|
|||
|
confusion_matrix[value, p_value] += 1
|
|||
|
return confusion_matrix
|
|||
|
|
|||
|
|
|||
|
def truncate(f, n):
|
|||
|
trunc_f = np.math.floor(f * 10 ** n) / 10 ** n
|
|||
|
return '{:.2f}'.format(trunc_f)
|
|||
|
|
|||
|
|
|||
|
def binarize(x, sz=256):
|
|||
|
return torch.nn.functional.one_hot(x, sz).float()
|
|||
|
|
|||
|
|
|||
|
class OneHotEncodingLayer(nn.Module):
|
|||
|
def __init__(self, sz=256):
|
|||
|
super(OneHotEncodingLayer, self).__init__()
|
|||
|
self.size = sz
|
|||
|
|
|||
|
def forward(self, x):
|
|||
|
return torch.nn.functional.one_hot(x, num_classes=self.size).float()
|
|||
|
|
|||
|
|
|||
|
class MyDataset(data.Dataset):
|
|||
|
"""
|
|||
|
数据转换编码(独热编码)
|
|||
|
"""
|
|||
|
def __init__(self, sessions, labels, indices):
|
|||
|
self.sessions = sessions
|
|||
|
self.labels = labels
|
|||
|
self.indices = indices
|
|||
|
self.packet_num_per_session = PACKET_NUM_PER_SESSION
|
|||
|
self.packet_len = PACKET_LEN
|
|||
|
|
|||
|
def __getitem__(self, index):
|
|||
|
idx = self.indices[index]
|
|||
|
X = torch.ones((self.packet_num_per_session, self.packet_len), dtype=torch.int64) * -1
|
|||
|
|
|||
|
for i, packet in enumerate(self.sessions[idx]):
|
|||
|
if i < self.packet_num_per_session: # 每个数据会话留只取前 num 个
|
|||
|
for j, byte in enumerate(packet[:self.packet_len]): # 每个数据包取前 PACKET_LEN 字节
|
|||
|
X[i, (self.packet_len - 1 - j)] = byte
|
|||
|
|
|||
|
# label = self.labels[idx].astype(np.int64) # CrossEntropyLoss 会自动把标签转换成onehot形式
|
|||
|
# y = torch.nn.functional.one_hot(torch.from_numpy(label), num_classes=5)[0]
|
|||
|
y = self.labels[idx][0].astype(np.int64)
|
|||
|
return X, y
|
|||
|
|
|||
|
def __len__(self):
|
|||
|
return len(self.indices)
|
|||
|
|
|||
|
|
|||
|
class FEMnet(nn.Module):
|
|||
|
"""
|
|||
|
Feature extraction module 定义
|
|||
|
"""
|
|||
|
def __init__(self, flow_len, packet_len, gru_units):
|
|||
|
super(FEMnet, self).__init__()
|
|||
|
self.packet_len = packet_len
|
|||
|
self.flow_len = flow_len
|
|||
|
self.batch_size = 10
|
|||
|
self.gru_hidden_size = gru_units
|
|||
|
self.rep_dim = gru_units
|
|||
|
|
|||
|
self.embedding = OneHotEncodingLayer(sz=256) # 独热编码
|
|||
|
self.block2 = ByteBlock(self.packet_len, (128, 256), (5, 5), (1, 1), (2, 2))
|
|||
|
self.block3 = ByteBlock(self.packet_len, (192, 320), (7, 5), (1, 1), (2, 2))
|
|||
|
|
|||
|
self.lstm_layer = nn.GRU(256, self.gru_hidden_size, dropout=0.1, bidirectional=True)
|
|||
|
# self.dense_layer = nn.Linear(self.gru_hidden_size * 2, 5)
|
|||
|
# self.output = nn.Softmax(dim=1) # Linear 自带Softmax 分类
|
|||
|
|
|||
|
def forward(self, x): # x: [batch_size, flow_len, packet_len] = [10, 14, 100]
|
|||
|
embeddings_list = self.embedding(x) # [10, 14, 100, 256]
|
|||
|
encoder_list = torch.zeros((self.batch_size, self.flow_len, 256))
|
|||
|
for ix, embeddings in enumerate(embeddings_list): # [14, 100, 256]
|
|||
|
# print("embeddings 1: ", embeddings.shape)
|
|||
|
# embeddings = embeddings.permute(0, 2, 1)
|
|||
|
# print("embeddings 2: ", embeddings.shape)
|
|||
|
encoder1 = self.block2(embeddings) # [14, 128]
|
|||
|
# print("encoder 1: ", encoder1.shape)
|
|||
|
encoder2 = self.block3(embeddings) # [14, 128]
|
|||
|
# print("encoder 2: ", encoder2.shape)
|
|||
|
encoder = torch.cat([encoder1, encoder2], 1) # [14, 256]
|
|||
|
# print("encoder : ", encoder.shape)
|
|||
|
encoder_list[ix] = encoder
|
|||
|
|
|||
|
# rnn 网络输入:[seq_len, batch_size, hidden_size]
|
|||
|
encoder_list = encoder_list.permute(1, 0, 2) # [10, 14, 256] -> [14, 10 ,256]
|
|||
|
biLSTM, final_hidden_state = self.lstm_layer(encoder_list) # [14, 10, 184], [2, 10, 92]
|
|||
|
biLSTM = biLSTM.permute(1, 0, 2) # [10, 14, 184]
|
|||
|
# print("biLSTM: ", biLSTM.shape)
|
|||
|
attn_output, attention = self.attention_net(biLSTM, final_hidden_state)
|
|||
|
dense = self.dense_layer(attn_output)
|
|||
|
# out = self.output(dense)
|
|||
|
# out = dense[:, -1, :]
|
|||
|
return dense
|
|||
|
|
|||
|
# lstm_output : [batch_size, n_step, self.byte_hidden_size * num_directions(=2)], F matrix
|
|||
|
def attention_net(self, lstm_output, final_state):
|
|||
|
# print("lstm_output: ", lstm_output.shape)
|
|||
|
# print("final_state: ", final_state.shape)
|
|||
|
# hidden : [batch_size, self.byte_hidden_size * num_directions(=2), 1(=n_layer)]
|
|||
|
hidden = final_state.view(-1, self.gru_hidden_size * 2, 1) # Tensor维度的重构,-1表示该维度取决于其他维度
|
|||
|
# attn_weights : [batch_size, n_step]
|
|||
|
attn_weights = torch.bmm(lstm_output, hidden).squeeze(2) # 加权求和,第三维降维
|
|||
|
soft_attn_weights = torch.nn.functional.softmax(attn_weights, 1)
|
|||
|
# [batch_size, self.byte_hidden_size * num_directions(=2), n_step] * [batch_size, n_step, 1]
|
|||
|
# = [batch_size, self.byte_hidden_size * num_directions(=2), 1]
|
|||
|
context = torch.bmm(lstm_output.transpose(1, 2), soft_attn_weights.unsqueeze(2)).squeeze(2)
|
|||
|
return context, soft_attn_weights # context : [batch_size, self.byte_hidden_size * num_directions(=2)]
|
|||
|
|
|||
|
|
|||
|
class MyModel(nn.Module):
|
|||
|
"""
|
|||
|
Feature extraction module定义
|
|||
|
"""
|
|||
|
def __init__(self, binet, feature_dim, class_n):
|
|||
|
super(MyModel, self).__init__()
|
|||
|
self.binet = binet
|
|||
|
self.feature_dim = feature_dim
|
|||
|
self.cluster_num = class_n
|
|||
|
""" 对比聚类 """
|
|||
|
self.instance_projector = nn.Sequential( # 实例-单例
|
|||
|
nn.Linear(self.binet.rep_dim, self.binet.rep_dim),
|
|||
|
nn.ReLU(),
|
|||
|
nn.Linear(self.binet.rep_dim, self.feature_dim),
|
|||
|
)
|
|||
|
self.cluster_projector = nn.Sequential( # 聚类-集群
|
|||
|
nn.Linear(self.binet.rep_dim, self.binet.rep_dim),
|
|||
|
nn.ReLU(),
|
|||
|
nn.Linear(self.binet.rep_dim, self.cluster_num),
|
|||
|
nn.Softmax(dim=1)
|
|||
|
)
|
|||
|
|
|||
|
# 训练时执行
|
|||
|
def forward(self, x_i, x_j): # x_i, x_j 来自同一数据的两种不同预处理方式得到的嵌入矩阵
|
|||
|
h_i = self.resnet(x_i)
|
|||
|
h_j = self.resnet(x_j)
|
|||
|
|
|||
|
z_i = normalize(self.instance_projector(h_i), dim=1)
|
|||
|
z_j = normalize(self.instance_projector(h_j), dim=1)
|
|||
|
|
|||
|
c_i = self.cluster_projector(h_i)
|
|||
|
c_j = self.cluster_projector(h_j)
|
|||
|
|
|||
|
return z_i, z_j, c_i, c_j
|
|||
|
|
|||
|
# 测试时执行
|
|||
|
def forward_cluster(self, x):
|
|||
|
h = self.resnet(x)
|
|||
|
c = self.cluster_projector(h) # 聚类分布
|
|||
|
c = torch.argmax(c, dim=1) # 得到每个样本的聚类标签。
|
|||
|
return c
|
|||
|
|
|||
|
|
|||
|
def save_result(cf_ma, len_test, epochs=8):
|
|||
|
"""
|
|||
|
计算 metrics_list,保存测试/训练结果
|
|||
|
:param cf_ma:
|
|||
|
:param len_test:
|
|||
|
:param epochs:
|
|||
|
:return:
|
|||
|
"""
|
|||
|
metrics_list = []
|
|||
|
for i in range(5):
|
|||
|
if i == 0:
|
|||
|
metrics_list.append(
|
|||
|
[dict_5class[i], str(i), str(cf_ma[i, 0]), str(cf_ma[i, 1]), str(cf_ma[i, 2]), str(cf_ma[i, 3]),
|
|||
|
str(cf_ma[i, 4]), '--', '--', '--'])
|
|||
|
else:
|
|||
|
acc = truncate((float(len_test - cf_ma[:, i].sum() - cf_ma[i, :].sum() + cf_ma[i, i] * 2) / len_test) * 100,
|
|||
|
2)
|
|||
|
tpr = truncate((float(cf_ma[i, i]) / cf_ma[i].sum()) * 100, 2)
|
|||
|
fpr = truncate((float(cf_ma[0, i]) / cf_ma[0].sum()) * 100, 2)
|
|||
|
metrics_list.append(
|
|||
|
[dict_5class[i], str(i), str(cf_ma[i, 0]), str(cf_ma[i, 1]), str(cf_ma[i, 2]), str(cf_ma[i, 3]),
|
|||
|
str(cf_ma[i, 4]), str(acc), str(tpr), str(fpr)])
|
|||
|
overall_acc = truncate(
|
|||
|
(float(cf_ma[0, 0] + cf_ma[1, 1] + cf_ma[2, 2] + cf_ma[3, 3] + cf_ma[4, 4]) / len_test) * 100, 2)
|
|||
|
overall_tpr = truncate((float(cf_ma[1, 1] + cf_ma[2, 2] + cf_ma[3, 3] + cf_ma[4, 4]) / cf_ma[1:].sum()) * 100, 2)
|
|||
|
overall_fpr = truncate((float(cf_ma[0, 1:].sum()) / cf_ma[0, :].sum()) * 100, 2)
|
|||
|
|
|||
|
# 获取当前日期
|
|||
|
current_date = time.strftime("%Y%m%d", time.localtime())
|
|||
|
# 构造文件名
|
|||
|
file_name = f"evaluate_{current_date}.txt"
|
|||
|
|
|||
|
with open(file_name, 'a') as f:
|
|||
|
f.write("\n")
|
|||
|
t = time.strftime('%Y-%m-%d %X', time.localtime())
|
|||
|
f.write(t + "\n")
|
|||
|
f.write('CLASS_NUM: 5\n')
|
|||
|
f.write('PACKET_LEN: ' + str(PACKET_LEN) + "\n")
|
|||
|
f.write('PACKET_NUM_PER_SESSION: ' + str(PACKET_NUM_PER_SESSION) + "\n")
|
|||
|
f.write('MINI_BATCH: ' + str(MINI_BATCH) + "\n")
|
|||
|
f.write('TRAIN_EPOCHS: ' + str(epochs) + "\n")
|
|||
|
f.write('DATA_DIR: ' + DATA_DIR + "\n")
|
|||
|
f.write("label\tindex\t0\t1\t2\t3\t4\tACC\tTPR\tFPR\n")
|
|||
|
for metrics in metrics_list:
|
|||
|
f.write('\t'.join(metrics) + "\n")
|
|||
|
f.write('Overall accuracy: ' + str(overall_acc) + "\n")
|
|||
|
f.write('Overall TPR: ' + str(overall_tpr) + "\n")
|
|||
|
f.write('Overall FPR: ' + str(overall_fpr) + "\n")
|
|||
|
# f.write('Train time(second): ' + str(int(train_time)) + "\n")
|
|||
|
# f.write('Test time(second): ' + str(int(test_time)) + "\n\n")
|
|||
|
f.write("\n\n")
|
|||
|
|
|||
|
|
|||
|
def train(model, dataloader, criterion, optimizer, device):
|
|||
|
model.train()
|
|||
|
running_loss = 0.0
|
|||
|
num = 0
|
|||
|
for inputs, labels in dataloader:
|
|||
|
inputs = inputs.to(device)
|
|||
|
labels = labels.to(device)
|
|||
|
|
|||
|
optimizer.zero_grad() # 梯度清零
|
|||
|
|
|||
|
outputs = model(inputs)
|
|||
|
num += 1
|
|||
|
# print(“ {}/{} outputs & label shape: ".format(num,len(dataloader.data_loader)),
|
|||
|
# outputs.shape, labels.shape)
|
|||
|
loss = criterion(outputs, labels)
|
|||
|
|
|||
|
loss.backward()
|
|||
|
optimizer.step()
|
|||
|
|
|||
|
running_loss += loss.item() * inputs.size(0)
|
|||
|
|
|||
|
return running_loss / len(dataloader.dataset)
|
|||
|
|
|||
|
|
|||
|
def evaluate(model, dataloader, criterion, device):
|
|||
|
model.eval()
|
|||
|
running_loss = 0.0
|
|||
|
cf_ma = np.zeros((5, 5), dtype=int)
|
|||
|
num = 0
|
|||
|
with torch.no_grad():
|
|||
|
for inputs, labels in dataloader:
|
|||
|
inputs = inputs.to(device)
|
|||
|
labels = labels.to(device)
|
|||
|
|
|||
|
outputs = model(inputs)
|
|||
|
loss = criterion(outputs, labels)
|
|||
|
|
|||
|
running_loss += loss.item() * inputs.size(0)
|
|||
|
predicted_labels = torch.argmax(outputs, dim=1).cpu().numpy()
|
|||
|
true_labels = labels.cpu().numpy()
|
|||
|
|
|||
|
cf_ma = update_confusion_matrix(cf_ma, true_labels, predicted_labels)
|
|||
|
num += predicted_labels.shape[0]
|
|||
|
|
|||
|
print("num len: ", num)
|
|||
|
save_result(cf_ma, num)
|
|||
|
return running_loss / len(dataloader.dataset), cf_ma
|
|||
|
|
|||
|
|
|||
|
def load_data():
|
|||
|
"""
|
|||
|
加载数据
|
|||
|
:return:
|
|||
|
"""
|
|||
|
t1 = timer()
|
|||
|
sessions = []
|
|||
|
labels = []
|
|||
|
num_pkls = len(glob.glob(DATA_DIR + 'ISCX2012_labels_*.pkl')) # 匹配路径
|
|||
|
for i in range(num_pkls):
|
|||
|
# if i != 1:
|
|||
|
# continue
|
|||
|
session_pkl = DATA_DIR + 'ISCX2012_pcaps_' + str(i) + '.pkl'
|
|||
|
session_lists = pickle.load(open(session_pkl, 'rb')) # 反序列化对象
|
|||
|
sessions.extend(session_lists.values.tolist()) # 追加元素
|
|||
|
|
|||
|
label_pkl = DATA_DIR + 'ISCX2012_labels_' + str(i) + '.pkl'
|
|||
|
label_lists = pickle.load(open(label_pkl, 'rb'))
|
|||
|
labels.extend(label_lists.values.tolist())
|
|||
|
print(i)
|
|||
|
t2 = timer()
|
|||
|
print("load data tims: ", t2 - t1)
|
|||
|
|
|||
|
labels = np.array(labels)
|
|||
|
normal_indices = np.where(labels == 0)[0] # 结果所在的 行,是个array
|
|||
|
# 数据量太大,不好训练。以下注释代码可以选择100000条正常流量进入训练(建议在数据预处理阶段选择一定数量的正常数据——节约内存开支)
|
|||
|
normal_indices = np.random.choice(normal_indices, 100000, replace=False) # 注释代码
|
|||
|
attack_indices = [np.where(labels == i)[0] for i in range(1, 5)] # label 1~4 所在行,是个 list
|
|||
|
# np.random.choice 会重复抽样, 若想不重复,增加参数:replace=False
|
|||
|
test_normal_indices = np.random.choice(normal_indices, int(len(normal_indices) * 0.4), replace=False)
|
|||
|
test_attack_indices = np.concatenate( # 模态融合
|
|||
|
[np.random.choice(attack_indices[i], int(len(attack_indices[i]) * 0.4), replace=False) for i in range(4)])
|
|||
|
test_indices = np.concatenate([test_normal_indices, test_attack_indices]).astype(int)
|
|||
|
# train_indices = np.array(list(set(np.arange(len(labels))) - set(test_indices)))
|
|||
|
attack_indices = np.concatenate(attack_indices).astype(int) # 注释代码
|
|||
|
indices = np.concatenate([normal_indices, attack_indices]).astype(int) # 注释代码
|
|||
|
train_indices = np.array(list(set(indices) - set(test_indices))) # 注释代码
|
|||
|
|
|||
|
return sessions, labels, train_indices, test_indices
|
|||
|
|
|||
|
|
|||
|
# 定义超参数
|
|||
|
NUM_EPOCHS = 8
|
|||
|
LEARNING_RATE = 0.001
|
|||
|
|
|||
|
# 创建模型实例
|
|||
|
fe_model = FEMnet(PACKET_NUM_PER_SESSION, PACKET_LEN, LSTM_UNITS)
|
|||
|
model = MyModel(fe_model, 128, class_num)
|
|||
|
# model = PLA_Attention_Model(100, 100, 50, 100, 14)
|
|||
|
|
|||
|
# 将模型移到GPU上(如果可用)
|
|||
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|||
|
model.to(device)
|
|||
|
|
|||
|
# 定义损失函数和优化器
|
|||
|
criterion = nn.CrossEntropyLoss()
|
|||
|
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
|
|||
|
|
|||
|
# 创建训练集和验证集的数据加载器
|
|||
|
sessions, labels, train_indices, test_indices = load_data()
|
|||
|
|
|||
|
train_dataset = MyDataset(sessions, labels, train_indices) # # num_workers=2,Windows不允许多个进程加载数据
|
|||
|
train_dataloader = data.DataLoader(train_dataset, batch_size=MINI_BATCH, shuffle=True, drop_last=True)
|
|||
|
|
|||
|
val_dataset = MyDataset(sessions, labels, test_indices) # 丢弃不成 batch 数据,否则会报错
|
|||
|
val_dataloader = data.DataLoader(val_dataset, batch_size=MINI_BATCH, shuffle=False, drop_last=True)
|
|||
|
|
|||
|
# 开始训练
|
|||
|
start_time = timer()
|
|||
|
|
|||
|
for epoch in range(NUM_EPOCHS):
|
|||
|
# 训练模型
|
|||
|
train_loss = train(model, train_dataloader, criterion, optimizer, device)
|
|||
|
# print(f"Train Loss: {truncate(train_loss, 4)}")
|
|||
|
# 在验证集上评估模型
|
|||
|
val_loss, confusion_matrix = evaluate(model, val_dataloader, criterion, device)
|
|||
|
|
|||
|
# 输出训练和验证的损失以及混淆矩阵
|
|||
|
print(f"Epoch {epoch + 1}/{NUM_EPOCHS}")
|
|||
|
print(f"Train Loss: {truncate(train_loss, 4)}")
|
|||
|
print(f"Validation Loss: {truncate(val_loss, 4)}")
|
|||
|
print("Confusion Matrix:")
|
|||
|
for i in range(5):
|
|||
|
for j in range(5):
|
|||
|
print(confusion_matrix[i][j], end="\t")
|
|||
|
print()
|
|||
|
print("---------------------------------")
|
|||
|
|
|||
|
end_time = timer()
|
|||
|
training_time = end_time - start_time
|
|||
|
|
|||
|
print(f"Training Time: {truncate(training_time, 2)} seconds")
|
|||
|
|