39 lines
811 B
Python
39 lines
811 B
Python
import os
|
|
import json
|
|
import torch
|
|
import clip
|
|
from PIL import Image
|
|
import sng_parser
|
|
from tqdm import tqdm
|
|
import codecs
|
|
import numpy as np
|
|
|
|
|
|
|
|
json_path = '/data/mshukor/data/recipe1m/recipe1m_13m/layer1.json'
|
|
data1 = json.load(open(json_path,'r'))
|
|
|
|
json_path = '/data/mshukor/data/recipe1m/recipe1m_13m/layer2+.json'
|
|
data_2 = json.load(open(json_path,'r'))
|
|
|
|
ids_with_images = []
|
|
|
|
for d in tqdm(data_2):
|
|
id_ = d['id']
|
|
if len(d['images']) > 0:
|
|
ids_with_images.append(id_)
|
|
|
|
|
|
new_ids = {'train': [], 'test': [], 'val': []}
|
|
|
|
|
|
for d in tqdm(data1):
|
|
id_ = d['id']
|
|
split = d['partition']
|
|
if id_ in ids_with_images:
|
|
new_ids[split].append(id_)
|
|
|
|
|
|
output_path = '/data/mshukor/data/recipe1m/recipe1m_13m/original_ids.json'
|
|
with open(output_path, 'w') as f:
|
|
json.dump(new_ids, f) |