algorithm_system_server/algorithm/yolo_segment.py

import datetime
import os
import time
import ffmpeg
import torch
import cv2
import numpy as np
from multiprocessing import Process, Manager
from threading import Thread
from read_data import LoadImages, LoadStreams
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
import torchvision

from PIL import Image, ImageDraw, ImageFont

class YOLO_Segment():
    time_reference = datetime.datetime.now()
    counter_frame = 0
    processed_fps = 0

    def __init__(self,video_path=None):
        
        
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        
        self.model = torch.load('weight/segment/yolov5s-seg.pt', map_location=self.device)['model'].float().fuse()
        self.classes = self.model.names
            
        self.frame = [None] 

        if video_path is not None:
            self.video_name = video_path
        else:
            self.video_name = 'vid2.mp4'  # A default video file
            
        
        self.dataset = LoadImages(self.video_name)
        
        self.names = self.model.names
 
    def use_webcam(self, source):
        # self.dataset.release()  # Release any existing video capture
        # self.cap = cv2.VideoCapture(0)  # Open default webcam
        # print('use_webcam')
        source = source
        self.imgsz = 640
        cudnn.benchmark = True
        self.dataset = LoadStreams(source, img_size=self.imgsz)
 
    def class_to_label(self, x):
        return self.classes[int(x)]
      
    def get_frame(self):
        
        colors = Colors()
        
        for im0s in self.dataset:
            # print(self.dataset.mode)
            # print(self.dataset)
            if self.dataset.mode == 'stream':
                image = im0s[0].copy()
            else:
                image = im0s.copy()
            img = image[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
            
            img0 = img.copy()

            img = torch.tensor(img0)
            
            img = img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            img = img.to(self.device)
            self.model.to(self.device)
            pred, proto = self.model(img)[:2]
            
            
            pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45,  max_det=1000, nm=32)
       
            for i, det in enumerate(pred):  # per image
                annotator = Annotator(image, line_width=3, example=str(self.names))
     
                if len(det):
                    masks = process_mask(proto[i], det[:, 6:], det[:, :4], img.shape[2:], upsample=True)  # HWC
                    det[:, :4] = scale_boxes(img.shape[2:], det[:, :4], img.shape).round()  # rescale boxes to im0 size
                    segments = reversed(masks2segments(masks))
                    segments = [scale_segments(img.shape[2:], x, img.shape, normalize=True) for x in segments]
                    
                                    # Print results
                    txt = ""
                    for c in det[:, 5].unique():
                        n = (det[:, 5] == c).sum()  # detections per class
                        txt += f"{n} {self.classes[int(c)]}{'s' * (n > 1)}, "  # add to string
                        
                    annotator.masks(masks,
                                colors=[colors(x, True) for x in det[:, 5]],
                                im_gpu=img[i])
                    
                    im0 = annotator.result()
                    

        # Draw the number of people on the frame and display it
            ret, jpeg = cv2.imencode(".jpg", im0)
 
            return jpeg.tobytes(), txt

class Colors:
    # Ultralytics color palette https://ultralytics.com/
    def __init__(self):
        # hex = matplotlib.colors.TABLEAU_COLORS.values()
        hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
                '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
        self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
        self.n = len(self.palette)

    def __call__(self, i, bgr=False):
        c = self.palette[int(i) % self.n]
        return (c[2], c[1], c[0]) if bgr else c

    @staticmethod
    def hex2rgb(h):  # rgb order (PIL)
        return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))

class Annotator:
    # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations
    def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):
        assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.'
        non_ascii = not is_ascii(example)  # non-latin labels, i.e. asian, arabic, cyrillic
        self.pil = pil or non_ascii
        if self.pil:  # use PIL
            self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
            self.draw = ImageDraw.Draw(self.im)
            self.font = 'Arial.Unicode.ttf' 
        else:  # use cv2
            self.im = im
        self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2)  # line width


    def masks(self, masks, colors, im_gpu, alpha=0.5):
        """Plot masks at once.
        Args:
            masks (tensor): predicted masks on cuda, shape: [n, h, w]
            colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]
            im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]
            alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque
        """
        if self.pil:
            # convert to numpy first
            self.im = np.asarray(self.im).copy()
        if im_gpu is None:
            # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)
            if len(masks) == 0:
                return
            if isinstance(masks, torch.Tensor):
                masks = torch.as_tensor(masks, dtype=torch.uint8)
                masks = masks.permute(1, 2, 0).contiguous()
                masks = masks.cpu().numpy()
            # masks = np.ascontiguousarray(masks.transpose(1, 2, 0))
            masks = scale_image(masks.shape[:2], masks, self.im.shape)
            masks = np.asarray(masks, dtype=np.float32)
            colors = np.asarray(colors, dtype=np.float32)  # shape(n,3)
            s = masks.sum(2, keepdims=True).clip(0, 1)  # add all masks together
            masks = (masks @ colors).clip(0, 255)  # (h,w,n) @ (n,3) = (h,w,3)
            self.im[:] = masks * alpha + self.im * (1 - s * alpha)
        else:
            if len(masks) == 0:
                self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
            colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0
            colors = colors[:, None, None]  # shape(n,1,1,3)
            masks = masks.unsqueeze(3)  # shape(n,h,w,1)
            masks_color = masks * (colors * alpha)  # shape(n,h,w,3)

            inv_alph_masks = (1 - masks * alpha).cumprod(0)  # shape(n,h,w,1)
            mcs = (masks_color * inv_alph_masks).sum(0) * 2  # mask color summand shape(n,h,w,3)

            im_gpu = im_gpu.flip(dims=[0])  # flip channel
            im_gpu = im_gpu.permute(1, 2, 0).contiguous()  # shape(h,w,3)
            im_gpu = im_gpu * inv_alph_masks[-1] + mcs
            im_mask = (im_gpu * 255).byte().cpu().numpy()
            # print(type(im_gpu), type(im_mask), type(self.im.shape))
            self.im[:] = scale_image(im_gpu.shape, im_mask, self.im.shape)
        if self.pil:
            # convert im back to PIL and update draw
            self.fromarray(self.im)
            
            
    def rectangle(self, xy, fill=None, outline=None, width=1):
        # Add rectangle to image (PIL-only)
        self.draw.rectangle(xy, fill, outline, width)

    def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'):
        # Add text to image (PIL-only)
        if anchor == 'bottom':  # start y from font bottom
            w, h = self.font.getsize(text)  # text width, height
            xy[1] += 1 - h
        self.draw.text(xy, text, fill=txt_color, font=self.font)

    def fromarray(self, im):
        # Update self.im from a numpy array
        self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
        self.draw = ImageDraw.Draw(self.im)

    def result(self):
        # Return annotated image as array
        return np.asarray(self.im)


def time_synchronized():
    # pytorch-accurate time
    if torch.cuda.is_available():
        torch.cuda.synchronize()
    return time.time()

def is_ascii(s=''):
    # Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7)
    s = str(s)  # convert list, tuple, None, etc. to str
    return len(s.encode().decode('ascii', 'ignore')) == len(s)

def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
    # Rescale boxes (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    boxes[..., [0, 2]] -= pad[0]  # x padding
    boxes[..., [1, 3]] -= pad[1]  # y padding
    boxes[..., :4] /= gain
    clip_boxes(boxes, img0_shape)
    return boxes

def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False):
    # Rescale coords (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    segments[:, 0] -= pad[0]  # x padding
    segments[:, 1] -= pad[1]  # y padding
    segments /= gain
    clip_segments(segments, img0_shape)
    if normalize:
        segments[:, 0] /= img0_shape[1]  # width
        segments[:, 1] /= img0_shape[0]  # height
    return segments

def clip_boxes(boxes, shape):
    # Clip boxes (xyxy) to image shape (height, width)
    if isinstance(boxes, torch.Tensor):  # faster individually
        boxes[..., 0].clamp_(0, shape[1])  # x1
        boxes[..., 1].clamp_(0, shape[0])  # y1
        boxes[..., 2].clamp_(0, shape[1])  # x2
        boxes[..., 3].clamp_(0, shape[0])  # y2
    else:  # np.array (faster grouped)
        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
        
        
def clip_segments(segments, shape):
    # Clip segments (xy1,xy2,...) to image shape (height, width)
    if isinstance(segments, torch.Tensor):  # faster individually
        segments[:, 0].clamp_(0, shape[1])  # x
        segments[:, 1].clamp_(0, shape[0])  # y
    else:  # np.array (faster grouped)
        segments[:, 0] = segments[:, 0].clip(0, shape[1])  # x
        segments[:, 1] = segments[:, 1].clip(0, shape[0])  # y
        
        
def masks2segments(masks, strategy='largest'):
    # Convert masks(n,160,160) into segments(n,xy)
    segments = []
    for x in masks.int().cpu().numpy().astype('uint8'):
        c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
        if c:
            if strategy == 'concat':  # concatenate all segments
                c = np.concatenate([x.reshape(-1, 2) for x in c])
            elif strategy == 'largest':  # select largest segment
                c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
        else:
            c = np.zeros((0, 2))  # no segments found
        segments.append(c.astype('float32'))
    return segments

def process_mask(protos, masks_in, bboxes, shape, upsample=False):
    """
    Crop before upsample.
    proto_out: [mask_dim, mask_h, mask_w]
    out_masks: [n, mask_dim], n is number of masks after nms
    bboxes: [n, 4], n is number of masks after nms
    shape:input_image_size, (h, w)

    return: h, w, n
    """

    c, mh, mw = protos.shape  # CHW
    ih, iw = shape
    # print(masks_in.shape, protos.shape)
    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)  # CHW

    downsampled_bboxes = bboxes.clone()
    downsampled_bboxes[:, 0] *= mw / iw
    downsampled_bboxes[:, 2] *= mw / iw
    downsampled_bboxes[:, 3] *= mh / ih
    downsampled_bboxes[:, 1] *= mh / ih

    masks = crop_mask(masks, downsampled_bboxes)  # CHW
    if upsample:
        masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
    return masks.gt_(0.5)


def crop_mask(masks, boxes):
    """
    "Crop" predicted masks by zeroing out everything not in the predicted bbox.
    Vectorized by Chong (thanks Chong).

    Args:
        - masks should be a size [h, w, n] tensor of masks
        - boxes should be a size [n, 4] tensor of bbox coords in relative point form
    """

    n, h, w = masks.shape
    x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(1,1,n)
    r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,w,1)
    c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None]  # cols shape(h,1,1)

    return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))


def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
    """
    img1_shape: model input shape, [h, w]
    img0_shape: origin pic shape, [h, w, 3]
    masks: [h, w, num]
    """
    # Rescale coordinates (xyxy) from im1_shape to im0_shape
    if ratio_pad is None:  # calculate from im0_shape
        gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])  # gain  = old / new
        pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2  # wh padding
    else:
        pad = ratio_pad[1]
    top, left = int(pad[1]), int(pad[0])  # y, x
    bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])

    if len(masks.shape) < 2:
        raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
    masks = masks[top:bottom, left:right]
    # masks = masks.permute(2, 0, 1).contiguous()
    # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0]
    # masks = masks.permute(1, 2, 0).contiguous()

    masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))

    if len(masks.shape) == 2:
        masks = masks[:, :, None]
    return masks

def xywh2xyxy(x):
    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y


def non_max_suppression(
        prediction,
        conf_thres=0.25,
        iou_thres=0.45,
        classes=None,
        agnostic=False,
        multi_label=False,
        labels=(),
        max_det=300,
        nm=0,  # number of masks
):
    """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections

    Returns:
         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
    """

    if isinstance(prediction, (list, tuple)):  # YOLOv5 model in validation model, output = (inference_out, loss_out)
        prediction = prediction[0]  # select only inference output

    device = prediction.device
    mps = 'mps' in device.type  # Apple MPS
    if mps:  # MPS not fully supported yet, convert tensors to CPU labelme_dataset NMS
        prediction = prediction.cpu()
    bs = prediction.shape[0]  # batch size
    nc = prediction.shape[2] - nm - 5  # number of classes
    xc = prediction[..., 4] > conf_thres  # candidates

    # Checks
    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'

    # Settings
    # min_wh = 2  # (pixels) minimum box width and height
    max_wh = 7680  # (pixels) maximum box width and height
    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
    time_limit = 0.5 + 0.05 * bs  # seconds to quit after
    redundant = True  # require redundant detections
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
    merge = False  # use merge-NMS

    t = time.time()
    mi = 5 + nc  # mask start index
    output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
    for xi, x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
        x = x[xc[xi]]  # confidence

        # Cat apriori labels if autolabelling
        if labels and len(labels[xi]):
            lb = labels[xi]
            v = torch.zeros((len(lb), nc + nm + 5), device=x.device)
            v[:, :4] = lb[:, 1:5]  # box
            v[:, 4] = 1.0  # conf
            v[range(len(lb)), lb[:, 0].long() + 5] = 1.0  # cls
            x = torch.cat((x, v), 0)

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf

        # Box/Mask
        box = xywh2xyxy(x[:, :4])  # center_x, center_y, width, height) to (x1, y1, x2, y2)
        mask = x[:, mi:]  # zero columns if no masks

        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_label:
            i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T
            x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1)
        else:  # best class only
            conf, j = x[:, 5:mi].max(1, keepdim=True)
            x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]

        # Filter by class
        if classes is not None:
            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]

        # Apply finite constraint
        # if not torch.isfinite(x).all():
        #     x = x[torch.isfinite(x).all(1)]

        # Check shape
        n = x.shape[0]  # number of boxes
        if not n:  # no boxes
            continue
        elif n > max_nms:  # excess boxes
            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
        else:
            x = x[x[:, 4].argsort(descending=True)]  # sort by confidence

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
        if i.shape[0] > max_det:  # limit detections
            i = i[:max_det]
        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
            weights = iou * scores[None]  # box weights
            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
            if redundant:
                i = i[iou.sum(1) > 1]  # require redundancy

        output[xi] = x[i]
        if mps:
            output[xi] = output[xi].to(device)


    return output


def box_iou(box1, box2, eps=1e-7):
    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
    """
    Return intersection-over-union (Jaccard index) of boxes.
    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
    Arguments:
        box1 (Tensor[N, 4])
        box2 (Tensor[M, 4])
    Returns:
        iou (Tensor[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2
    """

    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
    (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
    inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)

    # IoU = inter / (area1 + area2 - inter)
    return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)
first commit 2024-06-21 10:06:54 +08:00			`import datetime`
			`import os`
			`import time`
			`import ffmpeg`
			`import torch`
			`import cv2`
			`import numpy as np`
			`from multiprocessing import Process, Manager`
			`from threading import Thread`
			`from read_data import LoadImages, LoadStreams`
			`import torch.backends.cudnn as cudnn`
			`import torch.nn.functional as F`
			`import torchvision`

			`from PIL import Image, ImageDraw, ImageFont`

			`class YOLO_Segment():`
			`time_reference = datetime.datetime.now()`
			`counter_frame = 0`
			`processed_fps = 0`

			`def __init__(self,video_path=None):`


			`self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")`


			`self.model = torch.load('weight/segment/yolov5s-seg.pt', map_location=self.device)['model'].float().fuse()`
			`self.classes = self.model.names`

			`self.frame = [None]`

			`if video_path is not None:`
			`self.video_name = video_path`
			`else:`
			`self.video_name = 'vid2.mp4' # A default video file`


			`self.dataset = LoadImages(self.video_name)`

			`self.names = self.model.names`

			`def use_webcam(self, source):`
			`# self.dataset.release() # Release any existing video capture`
			`# self.cap = cv2.VideoCapture(0) # Open default webcam`
			`# print('use_webcam')`
			`source = source`
			`self.imgsz = 640`
			`cudnn.benchmark = True`
			`self.dataset = LoadStreams(source, img_size=self.imgsz)`

			`def class_to_label(self, x):`
			`return self.classes[int(x)]`

			`def get_frame(self):`

			`colors = Colors()`

			`for im0s in self.dataset:`
			`# print(self.dataset.mode)`
			`# print(self.dataset)`
			`if self.dataset.mode == 'stream':`
			`image = im0s[0].copy()`
			`else:`
			`image = im0s.copy()`
			`img = image[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416`

			`img0 = img.copy()`

			`img = torch.tensor(img0)`

			`img = img.float() # uint8 to fp16/32`
			`img /= 255.0 # 0 - 255 to 0.0 - 1.0`
			`if img.ndimension() == 3:`
			`img = img.unsqueeze(0)`
			`img = img.to(self.device)`
			`self.model.to(self.device)`
			`pred, proto = self.model(img)[:2]`


			`pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45, max_det=1000, nm=32)`

			`for i, det in enumerate(pred): # per image`
			`annotator = Annotator(image, line_width=3, example=str(self.names))`

			`if len(det):`
			`masks = process_mask(proto[i], det[:, 6:], det[:, :4], img.shape[2:], upsample=True) # HWC`
			`det[:, :4] = scale_boxes(img.shape[2:], det[:, :4], img.shape).round() # rescale boxes to im0 size`
			`segments = reversed(masks2segments(masks))`
			`segments = [scale_segments(img.shape[2:], x, img.shape, normalize=True) for x in segments]`

			`# Print results`
			`txt = ""`
			`for c in det[:, 5].unique():`
			`n = (det[:, 5] == c).sum() # detections per class`
			`txt += f"{n} {self.classes[int(c)]}{'s' * (n > 1)}, " # add to string`

			`annotator.masks(masks,`
			`colors=[colors(x, True) for x in det[:, 5]],`
			`im_gpu=img[i])`

			`im0 = annotator.result()`


			`# Draw the number of people on the frame and display it`
			`ret, jpeg = cv2.imencode(".jpg", im0)`

			`return jpeg.tobytes(), txt`

			`class Colors:`
			`# Ultralytics color palette https://ultralytics.com/`
			`def __init__(self):`
			`# hex = matplotlib.colors.TABLEAU_COLORS.values()`
			`hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',`
			`'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')`
			`self.palette = [self.hex2rgb(f'#{c}') for c in hexs]`
			`self.n = len(self.palette)`

			`def __call__(self, i, bgr=False):`
			`c = self.palette[int(i) % self.n]`
			`return (c[2], c[1], c[0]) if bgr else c`

			`@staticmethod`
			`def hex2rgb(h): # rgb order (PIL)`
			`return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))`

			`class Annotator:`
			`# YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations`
			`def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):`
			`assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.'`
			`non_ascii = not is_ascii(example) # non-latin labels, i.e. asian, arabic, cyrillic`
			`self.pil = pil or non_ascii`
			`if self.pil: # use PIL`
			`self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)`
			`self.draw = ImageDraw.Draw(self.im)`
			`self.font = 'Arial.Unicode.ttf'`
			`else: # use cv2`
			`self.im = im`
			`self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width`


			`def masks(self, masks, colors, im_gpu, alpha=0.5):`
			`"""Plot masks at once.`
			`Args:`
			`masks (tensor): predicted masks on cuda, shape: [n, h, w]`
			`colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n]`
			`im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1]`
			`alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque`
			`"""`
			`if self.pil:`
			`# convert to numpy first`
			`self.im = np.asarray(self.im).copy()`
			`if im_gpu is None:`
			`# Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)`
			`if len(masks) == 0:`
			`return`
			`if isinstance(masks, torch.Tensor):`
			`masks = torch.as_tensor(masks, dtype=torch.uint8)`
			`masks = masks.permute(1, 2, 0).contiguous()`
			`masks = masks.cpu().numpy()`
			`# masks = np.ascontiguousarray(masks.transpose(1, 2, 0))`
			`masks = scale_image(masks.shape[:2], masks, self.im.shape)`
			`masks = np.asarray(masks, dtype=np.float32)`
			`colors = np.asarray(colors, dtype=np.float32) # shape(n,3)`
			`s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together`
			`masks = (masks @ colors).clip(0, 255) # (h,w,n) @ (n,3) = (h,w,3)`
			`self.im[:] = masks * alpha + self.im * (1 - s * alpha)`
			`else:`
			`if len(masks) == 0:`
			`self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255`
			`colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0`
			`colors = colors[:, None, None] # shape(n,1,1,3)`
			`masks = masks.unsqueeze(3) # shape(n,h,w,1)`
			`masks_color = masks * (colors * alpha) # shape(n,h,w,3)`

			`inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1)`
			`mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(n,h,w,3)`

			`im_gpu = im_gpu.flip(dims=[0]) # flip channel`
			`im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3)`
			`im_gpu = im_gpu * inv_alph_masks[-1] + mcs`
			`im_mask = (im_gpu * 255).byte().cpu().numpy()`
			`# print(type(im_gpu), type(im_mask), type(self.im.shape))`
			`self.im[:] = scale_image(im_gpu.shape, im_mask, self.im.shape)`
			`if self.pil:`
			`# convert im back to PIL and update draw`
			`self.fromarray(self.im)`


			`def rectangle(self, xy, fill=None, outline=None, width=1):`
			`# Add rectangle to image (PIL-only)`
			`self.draw.rectangle(xy, fill, outline, width)`

			`def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'):`
			`# Add text to image (PIL-only)`
			`if anchor == 'bottom': # start y from font bottom`
			`w, h = self.font.getsize(text) # text width, height`
			`xy[1] += 1 - h`
			`self.draw.text(xy, text, fill=txt_color, font=self.font)`

			`def fromarray(self, im):`
			`# Update self.im from a numpy array`
			`self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)`
			`self.draw = ImageDraw.Draw(self.im)`

			`def result(self):`
			`# Return annotated image as array`
			`return np.asarray(self.im)`



			`def time_synchronized():`
			`# pytorch-accurate time`
			`if torch.cuda.is_available():`
			`torch.cuda.synchronize()`
			`return time.time()`

			`def is_ascii(s=''):`
			`# Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7)`
			`s = str(s) # convert list, tuple, None, etc. to str`
			`return len(s.encode().decode('ascii', 'ignore')) == len(s)`

			`def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):`
			`# Rescale boxes (xyxy) from img1_shape to img0_shape`
			`if ratio_pad is None: # calculate from img0_shape`
			`gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new`
			`pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding`
			`else:`
			`gain = ratio_pad[0][0]`
			`pad = ratio_pad[1]`

			`boxes[..., [0, 2]] -= pad[0] # x padding`
			`boxes[..., [1, 3]] -= pad[1] # y padding`
			`boxes[..., :4] /= gain`
			`clip_boxes(boxes, img0_shape)`
			`return boxes`

			`def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False):`
			`# Rescale coords (xyxy) from img1_shape to img0_shape`
			`if ratio_pad is None: # calculate from img0_shape`
			`gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new`
			`pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding`
			`else:`
			`gain = ratio_pad[0][0]`
			`pad = ratio_pad[1]`

			`segments[:, 0] -= pad[0] # x padding`
			`segments[:, 1] -= pad[1] # y padding`
			`segments /= gain`
			`clip_segments(segments, img0_shape)`
			`if normalize:`
			`segments[:, 0] /= img0_shape[1] # width`
			`segments[:, 1] /= img0_shape[0] # height`
			`return segments`

			`def clip_boxes(boxes, shape):`
			`# Clip boxes (xyxy) to image shape (height, width)`
			`if isinstance(boxes, torch.Tensor): # faster individually`
			`boxes[..., 0].clamp_(0, shape[1]) # x1`
			`boxes[..., 1].clamp_(0, shape[0]) # y1`
			`boxes[..., 2].clamp_(0, shape[1]) # x2`
			`boxes[..., 3].clamp_(0, shape[0]) # y2`
			`else: # np.array (faster grouped)`
			`boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2`
			`boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2`


			`def clip_segments(segments, shape):`
			`# Clip segments (xy1,xy2,...) to image shape (height, width)`
			`if isinstance(segments, torch.Tensor): # faster individually`
			`segments[:, 0].clamp_(0, shape[1]) # x`
			`segments[:, 1].clamp_(0, shape[0]) # y`
			`else: # np.array (faster grouped)`
			`segments[:, 0] = segments[:, 0].clip(0, shape[1]) # x`
			`segments[:, 1] = segments[:, 1].clip(0, shape[0]) # y`





			`def masks2segments(masks, strategy='largest'):`
			`# Convert masks(n,160,160) into segments(n,xy)`
			`segments = []`
			`for x in masks.int().cpu().numpy().astype('uint8'):`
			`c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]`
			`if c:`
			`if strategy == 'concat': # concatenate all segments`
			`c = np.concatenate([x.reshape(-1, 2) for x in c])`
			`elif strategy == 'largest': # select largest segment`
			`c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)`
			`else:`
			`c = np.zeros((0, 2)) # no segments found`
			`segments.append(c.astype('float32'))`
			`return segments`

			`def process_mask(protos, masks_in, bboxes, shape, upsample=False):`
			`"""`
			`Crop before upsample.`
			`proto_out: [mask_dim, mask_h, mask_w]`
			`out_masks: [n, mask_dim], n is number of masks after nms`
			`bboxes: [n, 4], n is number of masks after nms`
			`shape:input_image_size, (h, w)`

			`return: h, w, n`
			`"""`

			`c, mh, mw = protos.shape # CHW`
			`ih, iw = shape`
			`# print(masks_in.shape, protos.shape)`
			`masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW`

			`downsampled_bboxes = bboxes.clone()`
			`downsampled_bboxes[:, 0] *= mw / iw`
			`downsampled_bboxes[:, 2] *= mw / iw`
			`downsampled_bboxes[:, 3] *= mh / ih`
			`downsampled_bboxes[:, 1] *= mh / ih`

			`masks = crop_mask(masks, downsampled_bboxes) # CHW`
			`if upsample:`
			`masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW`
			`return masks.gt_(0.5)`


			`def crop_mask(masks, boxes):`
			`"""`
			`"Crop" predicted masks by zeroing out everything not in the predicted bbox.`
			`Vectorized by Chong (thanks Chong).`

			`Args:`
			`- masks should be a size [h, w, n] tensor of masks`
			`- boxes should be a size [n, 4] tensor of bbox coords in relative point form`
			`"""`

			`n, h, w = masks.shape`
			`x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)`
			`r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)`
			`c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)`

			`return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))`





			`def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):`
			`"""`
			`img1_shape: model input shape, [h, w]`
			`img0_shape: origin pic shape, [h, w, 3]`
			`masks: [h, w, num]`
			`"""`
			`# Rescale coordinates (xyxy) from im1_shape to im0_shape`
			`if ratio_pad is None: # calculate from im0_shape`
			`gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new`
			`pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding`
			`else:`
			`pad = ratio_pad[1]`
			`top, left = int(pad[1]), int(pad[0]) # y, x`
			`bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])`

			`if len(masks.shape) < 2:`
			`raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')`
			`masks = masks[top:bottom, left:right]`
			`# masks = masks.permute(2, 0, 1).contiguous()`
			`# masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0]`
			`# masks = masks.permute(1, 2, 0).contiguous()`

			`masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))`

			`if len(masks.shape) == 2:`
			`masks = masks[:, :, None]`
			`return masks`

			`def xywh2xyxy(x):`
			`# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right`
			`y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)`
			`y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x`
			`y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y`
			`y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x`
			`y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y`
			`return y`


			`def non_max_suppression(`
			`prediction,`
			`conf_thres=0.25,`
			`iou_thres=0.45,`
			`classes=None,`
			`agnostic=False,`
			`multi_label=False,`
			`labels=(),`
			`max_det=300,`
			`nm=0, # number of masks`
			`):`
			`"""Non-Maximum Suppression (NMS) on inference results to reject overlapping detections`

			`Returns:`
			`list of detections, on (n,6) tensor per image [xyxy, conf, cls]`
			`"""`

			`if isinstance(prediction, (list, tuple)): # YOLOv5 model in validation model, output = (inference_out, loss_out)`
			`prediction = prediction[0] # select only inference output`

			`device = prediction.device`
			`mps = 'mps' in device.type # Apple MPS`
			`if mps: # MPS not fully supported yet, convert tensors to CPU labelme_dataset NMS`
			`prediction = prediction.cpu()`
			`bs = prediction.shape[0] # batch size`
			`nc = prediction.shape[2] - nm - 5 # number of classes`
			`xc = prediction[..., 4] > conf_thres # candidates`

			`# Checks`
			`assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'`
			`assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'`

			`# Settings`
			`# min_wh = 2 # (pixels) minimum box width and height`
			`max_wh = 7680 # (pixels) maximum box width and height`
			`max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()`
			`time_limit = 0.5 + 0.05 * bs # seconds to quit after`
			`redundant = True # require redundant detections`
			`multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)`
			`merge = False # use merge-NMS`

			`t = time.time()`
			`mi = 5 + nc # mask start index`
			`output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs`
			`for xi, x in enumerate(prediction): # image index, image inference`
			`# Apply constraints`
			`# x[((x[..., 2:4] < min_wh) \| (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height`
			`x = x[xc[xi]] # confidence`

			`# Cat apriori labels if autolabelling`
			`if labels and len(labels[xi]):`
			`lb = labels[xi]`
			`v = torch.zeros((len(lb), nc + nm + 5), device=x.device)`
			`v[:, :4] = lb[:, 1:5] # box`
			`v[:, 4] = 1.0 # conf`
			`v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls`
			`x = torch.cat((x, v), 0)`

			`# If none remain process next image`
			`if not x.shape[0]:`
			`continue`

			`# Compute conf`
			`x[:, 5:] = x[:, 4:5] # conf = obj_conf cls_conf`

			`# Box/Mask`
			`box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2)`
			`mask = x[:, mi:] # zero columns if no masks`

			`# Detections matrix nx6 (xyxy, conf, cls)`
			`if multi_label:`
			`i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T`
			`x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1)`
			`else: # best class only`
			`conf, j = x[:, 5:mi].max(1, keepdim=True)`
			`x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]`

			`# Filter by class`
			`if classes is not None:`
			`x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]`

			`# Apply finite constraint`
			`# if not torch.isfinite(x).all():`
			`# x = x[torch.isfinite(x).all(1)]`

			`# Check shape`
			`n = x.shape[0] # number of boxes`
			`if not n: # no boxes`
			`continue`
			`elif n > max_nms: # excess boxes`
			`x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence`
			`else:`
			`x = x[x[:, 4].argsort(descending=True)] # sort by confidence`

			`# Batched NMS`
			`c = x[:, 5:6] * (0 if agnostic else max_wh) # classes`
			`boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores`
			`i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS`
			`if i.shape[0] > max_det: # limit detections`
			`i = i[:max_det]`
			`if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)`
			`# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)`
			`iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix`
			`weights = iou * scores[None] # box weights`
			`x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes`
			`if redundant:`
			`i = i[iou.sum(1) > 1] # require redundancy`

			`output[xi] = x[i]`
			`if mps:`
			`output[xi] = output[xi].to(device)`


			`return output`


			`def box_iou(box1, box2, eps=1e-7):`
			`# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py`
			`"""`
			`Return intersection-over-union (Jaccard index) of boxes.`
			`Both sets of boxes are expected to be in (x1, y1, x2, y2) format.`
			`Arguments:`
			`box1 (Tensor[N, 4])`
			`box2 (Tensor[M, 4])`
			`Returns:`
			`iou (Tensor[N, M]): the NxM matrix containing the pairwise`
			`IoU values for every element in boxes1 and boxes2`
			`"""`

			`# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)`
			`(a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)`
			`inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)`

			`# IoU = inter / (area1 + area2 - inter)`
			`return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)`