200 lines
7.2 KiB
Python
200 lines
7.2 KiB
Python
|
# Dataset utils and dataloaders
|
|||
|
|
|||
|
import glob
|
|||
|
import logging
|
|||
|
import math
|
|||
|
import os
|
|||
|
import random
|
|||
|
import shutil
|
|||
|
import time
|
|||
|
from itertools import repeat
|
|||
|
from multiprocessing.pool import ThreadPool
|
|||
|
from pathlib import Path
|
|||
|
from threading import Thread
|
|||
|
import re
|
|||
|
import cv2
|
|||
|
import numpy as np
|
|||
|
import torch
|
|||
|
import torch.nn.functional as F
|
|||
|
from PIL import Image, ExifTags
|
|||
|
from torch.utils.data import Dataset
|
|||
|
from tqdm import tqdm
|
|||
|
|
|||
|
# Parameters
|
|||
|
help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
|
|||
|
img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes
|
|||
|
vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes
|
|||
|
logger = logging.getLogger(__name__)
|
|||
|
|
|||
|
# Get orientation exif tag
|
|||
|
for orientation in ExifTags.TAGS.keys():
|
|||
|
if ExifTags.TAGS[orientation] == 'Orientation':
|
|||
|
break
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
class LoadImages: # for inference
|
|||
|
def __init__(self, path, stride=32, img_size=640):
|
|||
|
p = str(Path(path)) # os-agnostic
|
|||
|
p = os.path.abspath(p) # absolute path
|
|||
|
if '*' in p:
|
|||
|
files = sorted(glob.glob(p, recursive=True)) # glob
|
|||
|
elif os.path.isdir(p):
|
|||
|
files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
|
|||
|
elif os.path.isfile(p):
|
|||
|
files = [p] # files
|
|||
|
else:
|
|||
|
raise Exception(f'ERROR: {p} does not exist')
|
|||
|
|
|||
|
images = [x for x in files if x.split('.')[-1].lower() in img_formats]
|
|||
|
videos = [x for x in files if x.split('.')[-1].lower() in vid_formats]
|
|||
|
ni, nv = len(images), len(videos)
|
|||
|
|
|||
|
self.img_size = img_size
|
|||
|
self.files = images + videos
|
|||
|
self.nf = ni + nv # number of files
|
|||
|
self.video_flag = [False] * ni + [True] * nv
|
|||
|
self.mode = 'image'
|
|||
|
self.stride = stride
|
|||
|
if any(videos):
|
|||
|
self.new_video(videos[0]) # new video
|
|||
|
else:
|
|||
|
self.cap = None
|
|||
|
assert self.nf > 0, f'No images or videos found in {p}. ' \
|
|||
|
f'Supported formats are:\nimages: {img_formats}\nvideos: {vid_formats}'
|
|||
|
|
|||
|
def __iter__(self):
|
|||
|
self.count = 0
|
|||
|
return self
|
|||
|
|
|||
|
def __next__(self):
|
|||
|
if self.count == self.nf:
|
|||
|
raise StopIteration
|
|||
|
path = self.files[self.count]
|
|||
|
|
|||
|
if self.video_flag[self.count]:
|
|||
|
# Read video
|
|||
|
self.mode = 'video'
|
|||
|
ret_val, img0 = self.cap.read() # ret:表示读取是否成功的布尔值;
|
|||
|
if not ret_val:
|
|||
|
self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
|
|||
|
_, img0 = self.cap.read()
|
|||
|
# print(f'video {self.count + 1}/{self.nf} ({self.frame}/{self.nframes}) {path}: ', end='\n')
|
|||
|
|
|||
|
else:
|
|||
|
# Read image
|
|||
|
self.count += 1
|
|||
|
img0 = cv2.imread(path) # BGR
|
|||
|
assert img0 is not None, 'Image Not Found ' + path
|
|||
|
# print(f'image {self.count}/{self.nf} {path}: ', end='\n')
|
|||
|
|
|||
|
|
|||
|
img0 = letterbox(img0, new_shape=self.img_size, stride =self.stride)[0]
|
|||
|
# img0 = cv2.cvtColor(img0, cv2.COLOR_BGR2RGB)
|
|||
|
return img0
|
|||
|
|
|||
|
def new_video(self, path):
|
|||
|
self.frame = 0
|
|||
|
self.cap = cv2.VideoCapture(path)
|
|||
|
self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|||
|
|
|||
|
def __len__(self):
|
|||
|
return self.nf # number of files
|
|||
|
|
|||
|
|
|||
|
class LoadStreams: # multiple IP or RTSP cameras
|
|||
|
def __init__(self, sources='streams.txt', img_size=640):
|
|||
|
self.mode = 'stream'
|
|||
|
self.img_size = img_size
|
|||
|
|
|||
|
if os.path.isfile(sources):
|
|||
|
with open(sources, 'r') as f:
|
|||
|
sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
|
|||
|
else:
|
|||
|
sources = [sources]
|
|||
|
|
|||
|
n = len(sources)
|
|||
|
self.imgs = [None] * n
|
|||
|
self.sources = [clean_str(x) for x in sources] # clean source names for later
|
|||
|
for i, s in enumerate(sources):
|
|||
|
# Start the thread to read frames from the video stream
|
|||
|
print(f'{i + 1}/{n}: {s}... ', end='')
|
|||
|
cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s)
|
|||
|
assert cap.isOpened(), f'Failed to open {s}'
|
|||
|
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|||
|
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|||
|
fps = cap.get(cv2.CAP_PROP_FPS) % 100
|
|||
|
_, self.imgs[i] = cap.read() # guarantee first frame
|
|||
|
thread = Thread(target=self.update, args=([i, cap]), daemon=True)
|
|||
|
print(f' success ({w}x{h} at {fps:.2f} FPS).')
|
|||
|
thread.start()
|
|||
|
print('') # newline
|
|||
|
|
|||
|
# check for common shapes
|
|||
|
s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
|
|||
|
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
|
|||
|
if not self.rect:
|
|||
|
print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
|
|||
|
|
|||
|
def update(self, index, cap):
|
|||
|
# Read next stream frame in a daemon thread
|
|||
|
n = 0
|
|||
|
while cap.isOpened():
|
|||
|
n += 1
|
|||
|
# _, self.imgs[index] = cap.read()
|
|||
|
cap.grab()
|
|||
|
if n == 4: # read every 4th frame
|
|||
|
_, self.imgs[index] = cap.retrieve()
|
|||
|
n = 0
|
|||
|
time.sleep(0.01) # wait time
|
|||
|
|
|||
|
def __iter__(self):
|
|||
|
self.count = -1
|
|||
|
return self
|
|||
|
|
|||
|
def __next__(self):
|
|||
|
self.count += 1
|
|||
|
img0 = self.imgs.copy()
|
|||
|
|
|||
|
return img0
|
|||
|
|
|||
|
def __len__(self):
|
|||
|
return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
|
|||
|
|
|||
|
|
|||
|
def clean_str(s):
|
|||
|
# Cleans a string by replacing special characters with underscore _
|
|||
|
return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
|
|||
|
|
|||
|
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
|
|||
|
# Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
|
|||
|
shape = img.shape[:2] # current shape [height, width]
|
|||
|
if isinstance(new_shape, int):
|
|||
|
new_shape = (new_shape, new_shape)
|
|||
|
|
|||
|
# Scale ratio (new / old)
|
|||
|
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
|||
|
if not scaleup: # only scale down, do not scale up (for better test mAP)
|
|||
|
r = min(r, 1.0)
|
|||
|
|
|||
|
# Compute padding
|
|||
|
ratio = r, r # width, height ratios
|
|||
|
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
|||
|
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
|
|||
|
if auto: # minimum rectangle
|
|||
|
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
|
|||
|
elif scaleFill: # stretch
|
|||
|
dw, dh = 0.0, 0.0
|
|||
|
new_unpad = (new_shape[1], new_shape[0])
|
|||
|
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
|
|||
|
|
|||
|
dw /= 2 # divide padding into 2 sides
|
|||
|
dh /= 2
|
|||
|
|
|||
|
if shape[::-1] != new_unpad: # resize
|
|||
|
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
|||
|
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
|||
|
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
|||
|
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
|
|||
|
return img, ratio, (dw, dh)
|