Graduation_Project/LHL/utils/visualize.ipynb

759 lines
724 KiB
Plaintext
Raw Normal View History

2024-06-25 11:50:04 +08:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# set up Python environment: numpy for numerical routines, and matplotlib for plotting\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import pylab\n",
"from skimage import transform\n",
"# display plots in this notebook\n",
"%matplotlib inline\n",
"\n",
"import os\n",
"\n",
"# set display defaults\n",
"plt.rcParams['figure.figsize'] = (12, 9) # small images\n",
"plt.rcParams['image.interpolation'] = 'nearest' # don't interpolate: show square pixels\n",
"plt.rcParams['image.cmap'] = 'gray' # use grayscale output rather than a (potentially misleading) color heatmap"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"data_path = '../evaluation'\n",
"\n",
"# Load classes\n",
"classes = ['__background__']\n",
"with open(os.path.join(data_path, 'objects_vocab.txt')) as f:\n",
" for object in f.readlines():\n",
" classes.append(object.split(',')[0].lower().strip())\n",
"\n",
"# Load attributes\n",
"attributes = ['__no_attribute__']\n",
"with open(os.path.join(data_path, 'attributes_vocab.txt')) as f:\n",
" for att in f.readlines():\n",
" attributes.append(att.split(',')[0].lower().strip())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import argparse\n",
"import os\n",
"import sys\n",
"import torch\n",
"import tqdm\n",
"import cv2\n",
"import numpy as np\n",
"sys.path.append('../detectron2')\n",
"sys.path.append('../')\n",
"\n",
"import detectron2.utils.comm as comm\n",
"from detectron2.checkpoint import DetectionCheckpointer\n",
"from detectron2.data import build_detection_test_loader, build_detection_train_loader\n",
"from detectron2.config import get_cfg\n",
"from detectron2.engine import DefaultTrainer, default_setup, launch\n",
"from detectron2.evaluation import COCOEvaluator, verify_results\n",
"\n",
"from utils import mkdir, save_features\n",
"from extract_utils import get_image_blob\n",
"from models.bua import add_bottom_up_attention_config\n",
"from models.bua.layers.nms import nms"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Config '../configs/bua-caffe/extract-bua-caffe-r101.yaml' has no VERSION. Assuming it to be compatible with latest v2.\n"
]
}
],
"source": [
"config_file = '../configs/bua-caffe/extract-bua-caffe-r101.yaml'\n",
"\n",
"cfg = get_cfg()\n",
"add_bottom_up_attention_config(cfg, True)\n",
"cfg.merge_from_file(config_file)\n",
"cfg.freeze()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"GeneralizedBUARCNN(\n",
" (backbone): ResNet(\n",
" (stem): BUABasicStem(\n",
" (conv1): Conv2d(\n",
" 3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)\n",
" )\n",
" )\n",
" (res2): Sequential(\n",
" (0): BottleneckBlock(\n",
" (shortcut): Conv2d(\n",
" 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)\n",
" )\n",
" (conv1): Conv2d(\n",
" 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)\n",
" )\n",
" (conv2): Conv2d(\n",
" 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)\n",
" )\n",
" (conv3): Conv2d(\n",
" 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)\n",
" )\n",
" )\n",
" (1): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)\n",
" )\n",
" (conv2): Conv2d(\n",
" 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)\n",
" )\n",
" (conv3): Conv2d(\n",
" 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)\n",
" )\n",
" )\n",
" (2): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)\n",
" )\n",
" (conv2): Conv2d(\n",
" 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)\n",
" )\n",
" (conv3): Conv2d(\n",
" 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)\n",
" )\n",
" )\n",
" )\n",
" (res3): Sequential(\n",
" (0): BottleneckBlock(\n",
" (shortcut): Conv2d(\n",
" 256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)\n",
" )\n",
" (conv1): Conv2d(\n",
" 256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)\n",
" )\n",
" (conv2): Conv2d(\n",
" 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)\n",
" )\n",
" (conv3): Conv2d(\n",
" 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)\n",
" )\n",
" )\n",
" (1): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)\n",
" )\n",
" (conv2): Conv2d(\n",
" 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)\n",
" )\n",
" (conv3): Conv2d(\n",
" 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)\n",
" )\n",
" )\n",
" (2): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)\n",
" )\n",
" (conv2): Conv2d(\n",
" 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)\n",
" )\n",
" (conv3): Conv2d(\n",
" 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)\n",
" )\n",
" )\n",
" (3): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)\n",
" )\n",
" (conv2): Conv2d(\n",
" 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)\n",
" )\n",
" (conv3): Conv2d(\n",
" 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)\n",
" )\n",
" )\n",
" )\n",
" (res4): Sequential(\n",
" (0): BottleneckBlock(\n",
" (shortcut): Conv2d(\n",
" 512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv1): Conv2d(\n",
" 512, 256, kernel_size=(1, 1), stride=(2, 2), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (1): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (2): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (3): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (4): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (5): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (6): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (7): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (8): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (9): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (10): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (11): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (12): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (13): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (14): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (15): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (16): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (17): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (18): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (19): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (20): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (21): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (22): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False\n",
" (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (proposal_generator): BUARPN(\n",
" (anchor_generator): DefaultAnchorGenerator(\n",
" (cell_anchors): BufferList()\n",
" )\n",
" (rpn_head): StandardBUARPNHead(\n",
" (conv): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (objectness_logits): Conv2d(512, 24, kernel_size=(1, 1), stride=(1, 1))\n",
" (anchor_deltas): Conv2d(512, 48, kernel_size=(1, 1), stride=(1, 1))\n",
" )\n",
" )\n",
" (roi_heads): BUACaffeRes5ROIHeads(\n",
" (pooler): ROIPooler(\n",
" (level_poolers): ModuleList(\n",
" (0): RoIPool(output_size=(14, 14), spatial_scale=0.0625)\n",
" )\n",
" )\n",
" (res5): Sequential(\n",
" (0): BottleneckBlock(\n",
" (shortcut): Conv2d(\n",
" 1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv1): Conv2d(\n",
" 1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False\n",
" (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (1): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False\n",
" (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (2): BottleneckBlock(\n",
" (conv1): Conv2d(\n",
" 2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv2): Conv2d(\n",
" 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), bias=False\n",
" (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" (conv3): Conv2d(\n",
" 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
" (norm): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" )\n",
" (box_predictor): BUACaffeFastRCNNOutputLayers(\n",
" (cls_score): Linear(in_features=2048, out_features=1601, bias=True)\n",
" (bbox_pred): Linear(in_features=2048, out_features=6404, bias=True)\n",
" (cls_embed): Embedding(1601, 256)\n",
" (attr_linear1): Linear(in_features=2304, out_features=512, bias=True)\n",
" (attr_linear2): Linear(in_features=512, out_features=401, bias=True)\n",
" )\n",
" )\n",
")"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"MIN_BOXES = 10\n",
"MAX_BOXES = 20\n",
"CONF_THRESH = 0.4\n",
"\n",
"model = DefaultTrainer.build_model(cfg)\n",
"DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(\n",
" '../'+cfg.MODEL.WEIGHTS, resume=True\n",
")\n",
"model.eval()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"boxes=20\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApoAAAIACAYAAAAi8czaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOy9Z6BdV33m/dv19Hp7lXTVm5vkim1cMM0UG2MDjukQIJmQwvsOmWQSYF5IZpLAm4SQIRBaCiUJGGOMjTGWccWyZNmSrmTp9l5PL/vsPh/W1pETjCETCJDs54t0zt1n79X2Ws96/mVJvu8TIkSIECFChAgRIsRPGvLPugAhQoQIESJEiBAh/mMiJJohQoQIESJEiBAhfipQn+tLSdryPyA7/O9dmJ8/lGd9f/z3f9alCBEiRIgQIUKE+EXEcxJNQTIPTf+7luTnEvs3/qxLECJEiBAhQoQI8YuK0HQeIkSIECFChAgR4qeCkGiGCBEiRIgQIUKE+KngF4ZofvrTXNLRgfazLkeIECFChAgRIkSIHw8/UaIZifz0iOtNN3FJLvfcRFPTkH5azw0RIkSIECFChAjxf4cfmxh+6UtcubLCfzl5kjc+/jg3fe5zXAZw+jRvuf12rh0b4y3/+39z8bZtxA8f5paZGd45M8M73/xmhgC6utAefJBXz8zwzvl53vXbv812gD/+Y8578kleNzrKbWtr/No3vsF1//LZf/VXXJxKkXrwQd586hRvBmi1+J1//Eeunp7mHb/0Swy99rX0jY3xltlZfnl0lNv27iUJcMUV5EZHuW12ll8eH+etL3oRnQAf+AC7lpb4lYUF3j02xlv/7U0ZIkSIECFChAgR4tn4IVHn/xy33EL/FVewa8cOPhmJIB87xrtOnGDpzN9TKaJbt/J5gO9/n5s+/nG+//nPM3vhhWTuvJPbvvAFPvGpT3Hlww8zdeWV3DE8TPTQId75mc8wCTA8TO955/HJWg13aor/sm8fjx8+TPXM/d/1Lh6/+WYuvfJKvjA+ThMgEkEbHWX15ps5EIshj47y1pe+lC+dPk3zQx9i9yc+wbVXXskdn/40r3z3u/nmAw9QvPVWBj7xCa7fvp0vvOtdvPBlL+Nvn3qK2vAw0Z9oq4YIESJEiBAhQoT48Yjmddcx/OijPFMq4QAcPMjpZ//9y1/m+Jn/797NyIc/TNeHPyw+R6NEenvR9+1j8yWXsP1NbxJKqKahnn8+GYDjx5mcn8cEWFxkbe9ess8mms8F18X/yEc4AXD11XT29dF9//28CUCWkcpl6t3d6CMjDP3d33FLu8IqCsAzzzD3pS9xw7e/zejHP87JH6cdQoQIESJEiBAhQvz4+LGIpvQjPCCrVexnXSvt3ctfnyGlz8Ztt/GV736XwrO/u+46Bi0L98xnz8PX9R9t0nccHNvGB5BlWF5mddMmPvPsa/r7iRgGrcFBPvkvf3/NNXzz1lsZuPFGth06xLv37+eTExMYP+q5IUKECBEiRIgQIX48/Fg+mt/5DrOXXsr2TAa1uxt9/362/rBrjx9n4mMf46Izn2+4gV6Aw4eZeP/7ufgMab3xRvH9j4tWC7O7G/25/nbgAIVkksRttzEIEIshv+xldC0uYhYKlD/wAXaBIMyvfjU9IHw3v/hFFm6+mQONBs29e4W6GiJEiBAhQoQIEeIngx+LaH7lKyw+9hinTp/m3Q88wOsmJ1msVGg917W33cbdu3fTv7DAe5aX+dX3vpf9AG97G99TVeSFBd6ztMSvfOhDXPOvKeg3vsHh22/ntjPBQM9Go4H7znfyDx/4ANctLPDuyUne/ZKXiCCk227jq7fcwgULC7x7aYlffcMb2AHwZ3/GixcXRVmOH2fmjjtY/teUJ0SIECFChAgRIsTzQ/J9/we/lPZ//l8eQdndjb66itXRgXbkCG/9zd/kzq9+9WxA0H9M7N/o+4fe8rMuRYgQIUKECBEixC8intdHM9v5sQ+0LBOAj3/i1ezc2UUkovC5vx3lOw8+/supLh9JkpAlBYBWq4UkSUjSWaFUliUczwPA8zwUScZzxWdJkvA8D9/10DRRFA8fWZFR9bNFs1wbXRMpNC3bRvIB30eRxDUSMq7nMLJ5AwArq8v4SNQadQBczyMaiSBJglT7rkc6nSYSieKLopDOZllaWcEPrrFsCxWP4Uv73wzgmg6u6eM6wvU0GtVxXZuBoWHKNRG31Gi2iEZ1tMDAX60V6evvplIri/axHLp7ezAM4QqaSWdZXlwhlUjQqAqBuNFo4FguUuBjEIvFSKUTIAcbAtnHdEx6+rqIxSIAGLZBoVhEUtR2P+zevRvbFGWdn1tk8+bNjI2fCspZ5oILLkByfJ45OSnuUQPJk0gnYgA4roXtuXR0dACQTGSYmJgikxceBtFUFCUBsupiGxYAuXQPm/vO5yO/878AuPu7d/LNu25nvbAAwMTsJNt2n8PcyiyxqHDL1TWFbRv2YFmirMdOHsHDYXV1VYzBRJqrLn0Bf/uZLwOQ0LK4lkw8EsNzxD1UWaLWLKEmRRPZsstNr38TsiaSCZTLdd7/X/8bblN09gMP3M/To09A1CHTK+p7/NTTFEpFMqkUAHu2beauO+7mDz7yUQD+4ct30ts9wi033yrucf+9yKrJwuoYji/67qHHHmbjxhE8Q/RVYaVIZ2cnui4GxIPfe4xoTGHP3p0cOSri5zK5BNl8mtlFsWe75XUvZnL2BN19XQCUinXOP/9CKmUxlu+77z62bhuhUl1jaLgbAEX1sB2DRl28q/Wazcp8mWQsD8DI8FampqbIZkTdfN9D0xV6+3uYnZ0FYGFphf37LyCZTgBw5KknicUixONx0aamRb4jixqMQ8dxaDZsXFtlYX4dANO06e7O4yuina+96mru/e69FEsrAHR2ddNs+PT2D3P11VcD8K177iSXi6FqUnAPg0hEo1GrAbB5ZDvTE/PkcmLcuV6LpeU5jHqDjZtHAMj3dGKYLeIJMQCMZoNMOk6j2gBgealAV08P5ZoYU0aryvnn7cHzPBaXRNnT2QyW2aRliXdTlmU8z8N2xRhzPB9djxKPJDFb4rtKoUwqGicZF21mtZqoEZ3p+TlRru5OPFfCD9zVo3GXaEwhm04hy2JMSH6MmclFNm0YAiCRiFGvVYjHsgAU1zze/Mb3sDAtynn/gftIp3SS8QyX7r9WlK2hcuqpCd7wmtcBkNQlXMeEYB6+655vs1Io88bbhDEoIhPMobQhBy5N04uir1ZbZZatApYk2uPOr3+Fy/ZfwkB2mD079gLwzNgJRp86yq+89d0AzE3P0dU7QCIq3ikF8PGRPfEgz/NQFRXf85DkYI2QPIRhzeOH4xfmXJEQIf6z4zkjen5kMNALWmKS/Os33o6iKpxRQC/xz8xUUpsUOY6DhPTPoockCbzgN77vIyNxVkUN/u/7yK54jo+PJEvIVjs+CNfzUILFy3U9URPfR5ICwoqE53vkFsRkPNxo4QOWLe7h+T6qZZ8lmp5PxG6gqiZnihKpmQzWG22i6bouVQxuccWC57kevuPjBaRZVS183yPVWuYMGbdtB1VVkAXvxjRNUrUiLUsQEcf1SJTWcWxR10jEpF6rE9EtrIAU2paF5/rt7tJUD71gne0+ycf1XBLrHpoqus/2bEFeg4XFcRy6q9O4jihrtVojvzpBoVgJymnSZ0yD57O+Lr5zTMCXiOhicfY8F8/3ia0KEqlrJUqlMpF1URdVV5F1kGQPN2jnWKRBLtUkan0IgD2Tp9DHnqHZFES8VCnR0ThJpV5BU0U7K4pEx6SDG2w+XrC+ho9HoyGeE9V8NprHGK4E7aEY+K6Eptj4QV/IkoRp28iiqHiSz87vPfks4m3R/ycfx7PFM18wPcWW1UVQPSJJcc2FhRUMwyASEYSue8pkx7TFeZ/9pnju8dMkE2tsWWmK+k9NIMku1UYBLyAS++cMsmuL+EEYXLNuEF+wUBQxIF5YBFX16GaF5dVg3FUtoqs1KmKYsfvecUqVComkuEnLsOidPIrZsoI2tcivrmCaTdKnRX1
"text/plain": [
"<Figure size 864x648 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"im_file = '../datasets/demo/004545.jpg'\n",
"\n",
"im = cv2.imread(im_file)\n",
"dataset_dict = get_image_blob(im, cfg.MODEL.PIXEL_MEAN)\n",
"\n",
"with torch.set_grad_enabled(False):\n",
" boxes, scores, features_pooled, attr_scores = model([dataset_dict])\n",
"\n",
"dets = boxes[0].tensor.cpu() / dataset_dict['im_scale']\n",
"scores = scores[0].cpu()\n",
"feats = features_pooled[0].cpu()\n",
"attr_scores = attr_scores[0].cpu()\n",
"\n",
"max_conf = torch.zeros((scores.shape[0])).to(scores.device)\n",
"for cls_ind in range(1, scores.shape[1]):\n",
" cls_scores = scores[:, cls_ind]\n",
" keep = nms(dets, cls_scores, 0.3)\n",
" max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],\n",
" cls_scores[keep],\n",
" max_conf[keep])\n",
" \n",
"keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten()\n",
"if len(keep_boxes) < MIN_BOXES:\n",
" keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES]\n",
"elif len(keep_boxes) > MAX_BOXES:\n",
" keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES]\n",
"\n",
"im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)\n",
"plt.axis('off')\n",
"plt.imshow(im)\n",
"\n",
"boxes = dets[keep_boxes].numpy()\n",
"objects = np.argmax(scores[keep_boxes].numpy()[:,1:], axis=1)\n",
"attr_thresh = 0.1\n",
"attr = np.argmax(attr_scores[keep_boxes].numpy()[:,1:], axis=1)\n",
"attr_conf = np.max(attr_scores[keep_boxes].numpy()[:,1:], axis=1)\n",
"\n",
"for i in range(len(keep_boxes)):\n",
" bbox = boxes[i]\n",
" if bbox[0] == 0:\n",
" bbox[0] = 1\n",
" if bbox[1] == 0:\n",
" bbox[1] = 1\n",
" cls = classes[objects[i]+1]\n",
" if attr_conf[i] > attr_thresh:\n",
" cls = attributes[attr[i]+1] + \" \" + cls\n",
" plt.gca().add_patch(\n",
" plt.Rectangle((bbox[0], bbox[1]),\n",
" bbox[2] - bbox[0],\n",
" bbox[3] - bbox[1], fill=False,\n",
" edgecolor='red', linewidth=2, alpha=0.5)\n",
" )\n",
" plt.gca().text(bbox[0], bbox[1] - 2,\n",
" '%s' % (cls),\n",
" bbox=dict(facecolor='blue', alpha=0.5),\n",
" fontsize=10, color='white')\n",
"print('boxes={}'.format(len(keep_boxes)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}