# 主网络 import torch import torch.nn as nn import torch.optim as optim import torch.utils.data as data from torch.nn.functional import normalize import numpy as np import pickle import glob from timeit import default_timer as timer import time import warnings # 用 BiGRU 网络试试 from model import * # 不输出“Warning”信息 # warnings.filterwarnings("ignore") """ 出现过错误的地方,反映了基础不牢 交叉熵:https://blog.csdn.net/dss_dssssd/article/details/84036913 label 转 one-hot:https://blog.csdn.net/qq_34914551/article/details/88700334 实验自适应据类 """ LSTM_UNITS = 92 MINI_BATCH = 10 TRAIN_STEPS_PER_EPOCH = 12000 VALIDATION_STEPS_PER_EPOCH = 800 DATA_DIR = 'D:\\PyProject\\malware_traffic\\3_Packet\\' CHECKPOINTS_DIR = './checkpoints/' dict_5class = {0: 'Normal', 1: 'BFSSH', 2: 'Infilt', 3: 'HttpDoS', 4: 'DDoS'} class_num = 5 PACKET_NUM_PER_SESSION = 14 PACKET_LEN = 100 class ByteBlock(nn.Module): """ 卷积神经网络 """ def __init__(self, in_channels, nb_filter=(64, 100), filter_length=(3, 3), subsample=(2, 1), pool_length=(2, 2)): super(ByteBlock, self).__init__() layers = [] for i in range(len(nb_filter)): layers.append(nn.Conv1d(in_channels, nb_filter[i], kernel_size=filter_length[i], padding=0, stride=subsample[i])) layers.append(nn.Tanh()) if pool_length[i]: layers.append(nn.MaxPool1d(pool_length[i])) in_channels = nb_filter[i] self.block = nn.Sequential(*layers) self.global_pool = nn.AdaptiveMaxPool1d(1) self.fc = nn.Linear(nb_filter[-1], 128) def forward(self, x): x = self.block(x) x = self.global_pool(x).squeeze(dim=2) x = torch.nn.functional.relu(self.fc(x)) return x def update_confusion_matrix(confusion_matrix, actual_lb, predict_lb): for idx, value in enumerate(actual_lb): p_value = predict_lb[idx] confusion_matrix[value, p_value] += 1 return confusion_matrix def truncate(f, n): trunc_f = np.math.floor(f * 10 ** n) / 10 ** n return '{:.2f}'.format(trunc_f) def binarize(x, sz=256): return torch.nn.functional.one_hot(x, sz).float() class OneHotEncodingLayer(nn.Module): def __init__(self, sz=256): super(OneHotEncodingLayer, self).__init__() self.size = sz def forward(self, x): return torch.nn.functional.one_hot(x, num_classes=self.size).float() class MyDataset(data.Dataset): """ 数据转换编码(独热编码) """ def __init__(self, sessions, labels, indices): self.sessions = sessions self.labels = labels self.indices = indices self.packet_num_per_session = PACKET_NUM_PER_SESSION self.packet_len = PACKET_LEN def __getitem__(self, index): idx = self.indices[index] X = torch.ones((self.packet_num_per_session, self.packet_len), dtype=torch.int64) * -1 for i, packet in enumerate(self.sessions[idx]): if i < self.packet_num_per_session: # 每个数据会话留只取前 num 个 for j, byte in enumerate(packet[:self.packet_len]): # 每个数据包取前 PACKET_LEN 字节 X[i, (self.packet_len - 1 - j)] = byte # label = self.labels[idx].astype(np.int64) # CrossEntropyLoss 会自动把标签转换成onehot形式 # y = torch.nn.functional.one_hot(torch.from_numpy(label), num_classes=5)[0] y = self.labels[idx][0].astype(np.int64) return X, y def __len__(self): return len(self.indices) class FEMnet(nn.Module): """ Feature extraction module 定义 """ def __init__(self, flow_len, packet_len, gru_units): super(FEMnet, self).__init__() self.packet_len = packet_len self.flow_len = flow_len self.batch_size = 10 self.gru_hidden_size = gru_units self.rep_dim = gru_units self.embedding = OneHotEncodingLayer(sz=256) # 独热编码 self.block2 = ByteBlock(self.packet_len, (128, 256), (5, 5), (1, 1), (2, 2)) self.block3 = ByteBlock(self.packet_len, (192, 320), (7, 5), (1, 1), (2, 2)) self.lstm_layer = nn.GRU(256, self.gru_hidden_size, dropout=0.1, bidirectional=True) # self.dense_layer = nn.Linear(self.gru_hidden_size * 2, 5) # self.output = nn.Softmax(dim=1) # Linear 自带Softmax 分类 def forward(self, x): # x: [batch_size, flow_len, packet_len] = [10, 14, 100] embeddings_list = self.embedding(x) # [10, 14, 100, 256] encoder_list = torch.zeros((self.batch_size, self.flow_len, 256)) for ix, embeddings in enumerate(embeddings_list): # [14, 100, 256] # print("embeddings 1: ", embeddings.shape) # embeddings = embeddings.permute(0, 2, 1) # print("embeddings 2: ", embeddings.shape) encoder1 = self.block2(embeddings) # [14, 128] # print("encoder 1: ", encoder1.shape) encoder2 = self.block3(embeddings) # [14, 128] # print("encoder 2: ", encoder2.shape) encoder = torch.cat([encoder1, encoder2], 1) # [14, 256] # print("encoder : ", encoder.shape) encoder_list[ix] = encoder # rnn 网络输入:[seq_len, batch_size, hidden_size] encoder_list = encoder_list.permute(1, 0, 2) # [10, 14, 256] -> [14, 10 ,256] biLSTM, final_hidden_state = self.lstm_layer(encoder_list) # [14, 10, 184], [2, 10, 92] biLSTM = biLSTM.permute(1, 0, 2) # [10, 14, 184] # print("biLSTM: ", biLSTM.shape) attn_output, attention = self.attention_net(biLSTM, final_hidden_state) dense = self.dense_layer(attn_output) # out = self.output(dense) # out = dense[:, -1, :] return dense # lstm_output : [batch_size, n_step, self.byte_hidden_size * num_directions(=2)], F matrix def attention_net(self, lstm_output, final_state): # print("lstm_output: ", lstm_output.shape) # print("final_state: ", final_state.shape) # hidden : [batch_size, self.byte_hidden_size * num_directions(=2), 1(=n_layer)] hidden = final_state.view(-1, self.gru_hidden_size * 2, 1) # Tensor维度的重构,-1表示该维度取决于其他维度 # attn_weights : [batch_size, n_step] attn_weights = torch.bmm(lstm_output, hidden).squeeze(2) # 加权求和,第三维降维 soft_attn_weights = torch.nn.functional.softmax(attn_weights, 1) # [batch_size, self.byte_hidden_size * num_directions(=2), n_step] * [batch_size, n_step, 1] # = [batch_size, self.byte_hidden_size * num_directions(=2), 1] context = torch.bmm(lstm_output.transpose(1, 2), soft_attn_weights.unsqueeze(2)).squeeze(2) return context, soft_attn_weights # context : [batch_size, self.byte_hidden_size * num_directions(=2)] class MyModel(nn.Module): """ Feature extraction module定义 """ def __init__(self, binet, feature_dim, class_n): super(MyModel, self).__init__() self.binet = binet self.feature_dim = feature_dim self.cluster_num = class_n """ 对比聚类 """ self.instance_projector = nn.Sequential( # 实例-单例 nn.Linear(self.binet.rep_dim, self.binet.rep_dim), nn.ReLU(), nn.Linear(self.binet.rep_dim, self.feature_dim), ) self.cluster_projector = nn.Sequential( # 聚类-集群 nn.Linear(self.binet.rep_dim, self.binet.rep_dim), nn.ReLU(), nn.Linear(self.binet.rep_dim, self.cluster_num), nn.Softmax(dim=1) ) # 训练时执行 def forward(self, x_i, x_j): # x_i, x_j 来自同一数据的两种不同预处理方式得到的嵌入矩阵 h_i = self.resnet(x_i) h_j = self.resnet(x_j) z_i = normalize(self.instance_projector(h_i), dim=1) z_j = normalize(self.instance_projector(h_j), dim=1) c_i = self.cluster_projector(h_i) c_j = self.cluster_projector(h_j) return z_i, z_j, c_i, c_j # 测试时执行 def forward_cluster(self, x): h = self.resnet(x) c = self.cluster_projector(h) # 聚类分布 c = torch.argmax(c, dim=1) # 得到每个样本的聚类标签。 return c def save_result(cf_ma, len_test, epochs=8): """ 计算 metrics_list,保存测试/训练结果 :param cf_ma: :param len_test: :param epochs: :return: """ metrics_list = [] for i in range(5): if i == 0: metrics_list.append( [dict_5class[i], str(i), str(cf_ma[i, 0]), str(cf_ma[i, 1]), str(cf_ma[i, 2]), str(cf_ma[i, 3]), str(cf_ma[i, 4]), '--', '--', '--']) else: acc = truncate((float(len_test - cf_ma[:, i].sum() - cf_ma[i, :].sum() + cf_ma[i, i] * 2) / len_test) * 100, 2) tpr = truncate((float(cf_ma[i, i]) / cf_ma[i].sum()) * 100, 2) fpr = truncate((float(cf_ma[0, i]) / cf_ma[0].sum()) * 100, 2) metrics_list.append( [dict_5class[i], str(i), str(cf_ma[i, 0]), str(cf_ma[i, 1]), str(cf_ma[i, 2]), str(cf_ma[i, 3]), str(cf_ma[i, 4]), str(acc), str(tpr), str(fpr)]) overall_acc = truncate( (float(cf_ma[0, 0] + cf_ma[1, 1] + cf_ma[2, 2] + cf_ma[3, 3] + cf_ma[4, 4]) / len_test) * 100, 2) overall_tpr = truncate((float(cf_ma[1, 1] + cf_ma[2, 2] + cf_ma[3, 3] + cf_ma[4, 4]) / cf_ma[1:].sum()) * 100, 2) overall_fpr = truncate((float(cf_ma[0, 1:].sum()) / cf_ma[0, :].sum()) * 100, 2) # 获取当前日期 current_date = time.strftime("%Y%m%d", time.localtime()) # 构造文件名 file_name = f"evaluate_{current_date}.txt" with open(file_name, 'a') as f: f.write("\n") t = time.strftime('%Y-%m-%d %X', time.localtime()) f.write(t + "\n") f.write('CLASS_NUM: 5\n') f.write('PACKET_LEN: ' + str(PACKET_LEN) + "\n") f.write('PACKET_NUM_PER_SESSION: ' + str(PACKET_NUM_PER_SESSION) + "\n") f.write('MINI_BATCH: ' + str(MINI_BATCH) + "\n") f.write('TRAIN_EPOCHS: ' + str(epochs) + "\n") f.write('DATA_DIR: ' + DATA_DIR + "\n") f.write("label\tindex\t0\t1\t2\t3\t4\tACC\tTPR\tFPR\n") for metrics in metrics_list: f.write('\t'.join(metrics) + "\n") f.write('Overall accuracy: ' + str(overall_acc) + "\n") f.write('Overall TPR: ' + str(overall_tpr) + "\n") f.write('Overall FPR: ' + str(overall_fpr) + "\n") # f.write('Train time(second): ' + str(int(train_time)) + "\n") # f.write('Test time(second): ' + str(int(test_time)) + "\n\n") f.write("\n\n") def train(model, dataloader, criterion, optimizer, device): model.train() running_loss = 0.0 num = 0 for inputs, labels in dataloader: inputs = inputs.to(device) labels = labels.to(device) optimizer.zero_grad() # 梯度清零 outputs = model(inputs) num += 1 # print(“ {}/{} outputs & label shape: ".format(num,len(dataloader.data_loader)), # outputs.shape, labels.shape) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) return running_loss / len(dataloader.dataset) def evaluate(model, dataloader, criterion, device): model.eval() running_loss = 0.0 cf_ma = np.zeros((5, 5), dtype=int) num = 0 with torch.no_grad(): for inputs, labels in dataloader: inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) loss = criterion(outputs, labels) running_loss += loss.item() * inputs.size(0) predicted_labels = torch.argmax(outputs, dim=1).cpu().numpy() true_labels = labels.cpu().numpy() cf_ma = update_confusion_matrix(cf_ma, true_labels, predicted_labels) num += predicted_labels.shape[0] print("num len: ", num) save_result(cf_ma, num) return running_loss / len(dataloader.dataset), cf_ma def load_data(): """ 加载数据 :return: """ t1 = timer() sessions = [] labels = [] num_pkls = len(glob.glob(DATA_DIR + 'ISCX2012_labels_*.pkl')) # 匹配路径 for i in range(num_pkls): # if i != 1: # continue session_pkl = DATA_DIR + 'ISCX2012_pcaps_' + str(i) + '.pkl' session_lists = pickle.load(open(session_pkl, 'rb')) # 反序列化对象 sessions.extend(session_lists.values.tolist()) # 追加元素 label_pkl = DATA_DIR + 'ISCX2012_labels_' + str(i) + '.pkl' label_lists = pickle.load(open(label_pkl, 'rb')) labels.extend(label_lists.values.tolist()) print(i) t2 = timer() print("load data tims: ", t2 - t1) labels = np.array(labels) normal_indices = np.where(labels == 0)[0] # 结果所在的 行,是个array # 数据量太大,不好训练。以下注释代码可以选择100000条正常流量进入训练(建议在数据预处理阶段选择一定数量的正常数据——节约内存开支) normal_indices = np.random.choice(normal_indices, 100000, replace=False) # 注释代码 attack_indices = [np.where(labels == i)[0] for i in range(1, 5)] # label 1~4 所在行,是个 list # np.random.choice 会重复抽样, 若想不重复,增加参数:replace=False test_normal_indices = np.random.choice(normal_indices, int(len(normal_indices) * 0.4), replace=False) test_attack_indices = np.concatenate( # 模态融合 [np.random.choice(attack_indices[i], int(len(attack_indices[i]) * 0.4), replace=False) for i in range(4)]) test_indices = np.concatenate([test_normal_indices, test_attack_indices]).astype(int) # train_indices = np.array(list(set(np.arange(len(labels))) - set(test_indices))) attack_indices = np.concatenate(attack_indices).astype(int) # 注释代码 indices = np.concatenate([normal_indices, attack_indices]).astype(int) # 注释代码 train_indices = np.array(list(set(indices) - set(test_indices))) # 注释代码 return sessions, labels, train_indices, test_indices # 定义超参数 NUM_EPOCHS = 8 LEARNING_RATE = 0.001 # 创建模型实例 fe_model = FEMnet(PACKET_NUM_PER_SESSION, PACKET_LEN, LSTM_UNITS) model = MyModel(fe_model, 128, class_num) # model = PLA_Attention_Model(100, 100, 50, 100, 14) # 将模型移到GPU上(如果可用) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) # 创建训练集和验证集的数据加载器 sessions, labels, train_indices, test_indices = load_data() train_dataset = MyDataset(sessions, labels, train_indices) # # num_workers=2,Windows不允许多个进程加载数据 train_dataloader = data.DataLoader(train_dataset, batch_size=MINI_BATCH, shuffle=True, drop_last=True) val_dataset = MyDataset(sessions, labels, test_indices) # 丢弃不成 batch 数据,否则会报错 val_dataloader = data.DataLoader(val_dataset, batch_size=MINI_BATCH, shuffle=False, drop_last=True) # 开始训练 start_time = timer() for epoch in range(NUM_EPOCHS): # 训练模型 train_loss = train(model, train_dataloader, criterion, optimizer, device) # print(f"Train Loss: {truncate(train_loss, 4)}") # 在验证集上评估模型 val_loss, confusion_matrix = evaluate(model, val_dataloader, criterion, device) # 输出训练和验证的损失以及混淆矩阵 print(f"Epoch {epoch + 1}/{NUM_EPOCHS}") print(f"Train Loss: {truncate(train_loss, 4)}") print(f"Validation Loss: {truncate(val_loss, 4)}") print("Confusion Matrix:") for i in range(5): for j in range(5): print(confusion_matrix[i][j], end="\t") print() print("---------------------------------") end_time = timer() training_time = end_time - start_time print(f"Training Time: {truncate(training_time, 2)} seconds")