Graduation_Project/QN/RecipeRetrieval/preprocess/create_ids.py

39 lines
811 B
Python
Raw Permalink Normal View History

2024-06-26 12:21:29 +08:00
import os
import json
import torch
import clip
from PIL import Image
import sng_parser
from tqdm import tqdm
import codecs
import numpy as np
json_path = '/data/mshukor/data/recipe1m/recipe1m_13m/layer1.json'
data1 = json.load(open(json_path,'r'))
json_path = '/data/mshukor/data/recipe1m/recipe1m_13m/layer2+.json'
data_2 = json.load(open(json_path,'r'))
ids_with_images = []
for d in tqdm(data_2):
id_ = d['id']
if len(d['images']) > 0:
ids_with_images.append(id_)
new_ids = {'train': [], 'test': [], 'val': []}
for d in tqdm(data1):
id_ = d['id']
split = d['partition']
if id_ in ids_with_images:
new_ids[split].append(id_)
output_path = '/data/mshukor/data/recipe1m/recipe1m_13m/original_ids.json'
with open(output_path, 'w') as f:
json.dump(new_ids, f)