142 lines
3.8 KiB
Python
142 lines
3.8 KiB
Python
import json
|
|
from tqdm import tqdm
|
|
import os
|
|
from collections import defaultdict
|
|
from glob import glob
|
|
import random
|
|
|
|
|
|
|
|
|
|
|
|
def create_sbu_json(annot_path, output_dir):
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
data = json.load(open(annot_path,'r'))
|
|
new_data = []
|
|
|
|
for d in tqdm(data):
|
|
|
|
new_dict = {'image': d[0].replace('images', 'images_train'), 'caption': d[1]}
|
|
|
|
new_data.append(new_dict)
|
|
|
|
out_path = os.path.join(output_dir, 'sbu.json')
|
|
with open(out_path, 'w') as file:
|
|
json.dump(new_data, file)
|
|
|
|
|
|
|
|
def to_dict_coco(path, iid2captions, iid2split, iid2id):
|
|
name = path.split("/")[-1]
|
|
captions = iid2captions[name]
|
|
split = iid2split[name]
|
|
id_ = iid2id[name]
|
|
di = []
|
|
for c in captions:
|
|
di.append({'image': path, 'caption':c, 'image_id': id_})
|
|
|
|
return split, di
|
|
|
|
def create_coco_json(data_dir, output_dir, split=['train', 'val'], output_file='coco.json'):
|
|
with open(f"{data_dir}/karpathy/dataset_coco.json", "r") as fp:
|
|
captions = json.load(fp)
|
|
|
|
captions = captions["images"]
|
|
|
|
iid2captions = defaultdict(list)
|
|
iid2split = dict()
|
|
iid2id = dict()
|
|
|
|
for cap in tqdm(captions):
|
|
filename = cap["filename"]
|
|
iid2id[filename] = cap['cocoid']
|
|
iid2split[filename] = cap["split"]
|
|
for c in cap["sentences"]:
|
|
iid2captions[filename].append(c["raw"])
|
|
|
|
paths = list(glob(f"{data_dir}/train2014/*.jpg")) + list(glob(f"{data_dir}/val2014/*.jpg"))
|
|
random.shuffle(paths)
|
|
caption_paths = [path for path in tqdm(paths) if path.split("/")[-1] in iid2captions]
|
|
|
|
if len(paths) == len(caption_paths):
|
|
print("all images have caption annotations")
|
|
else:
|
|
print("not all images have caption annotations")
|
|
print(
|
|
len(paths), len(caption_paths), len(iid2captions),
|
|
)
|
|
|
|
new_data = []
|
|
num=0
|
|
other_splits = set()
|
|
for path in tqdm(caption_paths):
|
|
s, di = to_dict_coco(path, iid2captions, iid2split, iid2id)
|
|
if s in split:
|
|
num+=1
|
|
for d in di:
|
|
new_data.append(d)
|
|
else:
|
|
other_splits.add(s)
|
|
print(split, num, 'images', ', other_splits:', other_splits)
|
|
out_path = os.path.join(output_dir, output_file)
|
|
with open(out_path, 'w') as file:
|
|
json.dump(new_data, file)
|
|
|
|
|
|
|
|
|
|
|
|
def to_dict_vg(path, iid2captions):
|
|
name = path.split("/")[-1]
|
|
iid = int(name[:-4])
|
|
|
|
cdicts = iid2captions[iid]
|
|
captions = [c["phrase"] for c in cdicts]
|
|
|
|
di = []
|
|
for c in captions:
|
|
di.append({'image': path, 'caption':c})
|
|
|
|
return di
|
|
|
|
def create_vg_json(data_dir, output_dir, split=['train', 'val']):
|
|
with open(f"{data_dir}/annotations/region_descriptions.json", "r") as fp:
|
|
captions = json.load(fp)
|
|
|
|
iid2captions = defaultdict(list)
|
|
for cap in tqdm(captions):
|
|
cap = cap["regions"]
|
|
for c in cap:
|
|
iid2captions[c["image_id"]].append(c)
|
|
|
|
paths = list(glob(f"{data_dir}/images/VG_100K/*.jpg")) + list(
|
|
glob(f"{data_dir}/images/VG_100K_2/*.jpg")
|
|
)
|
|
random.shuffle(paths)
|
|
caption_paths = [
|
|
path for path in paths if int(path.split("/")[-1][:-4]) in iid2captions
|
|
]
|
|
|
|
if len(paths) == len(caption_paths):
|
|
print("all images have caption annotations")
|
|
else:
|
|
print("not all images have caption annotations")
|
|
print(
|
|
len(paths), len(caption_paths), len(iid2captions),
|
|
)
|
|
|
|
new_data = []
|
|
for path in tqdm(caption_paths):
|
|
di = to_dict_vg(path, iid2captions)
|
|
for d in di:
|
|
new_data.append(d)
|
|
|
|
out_path = os.path.join(output_dir, 'vg.json')
|
|
with open(out_path, 'w') as file:
|
|
json.dump(new_data, file)
|
|
|
|
|
|
|
|
|