tjy/demo/core/face_detection.py

from ultralytics import YOLO
import cv2
import torch


class FaceDetector:
    def __init__(self, model_path='../weights/yolov10s_face.pt'):
        try:
            self.model = YOLO(model_path)
        except FileNotFoundError:
            print("ERROR: Could not load the YOLO model")
            exit()
        self.class_names_dict = self.model.model.names

    def find_faces(self, img):
        original_img = img.copy()

        results = self.model(img, verbose=False)[0]
        detections = results.boxes.data

        face_detections = []
        other_detections = []
        bboxs = []

        for detection in detections:
            x1, y1, x2, y2, confidence, class_id = detection
            x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
            class_id = int(class_id)

            # Convert to xywh format
            w = x2 - x1
            h = y2 - y1
            bboxs.append([class_id, x1, y1, w, h, confidence])

            if self.class_names_dict[class_id] == 'face':
                face_detections.append((x1, y1, x2, y2, confidence))
            else:
                other_detections.append((x1, y1, x2, y2, class_id))

        bboxs = []

        # Find the largest face
        if face_detections:
            largest_face = max(face_detections, key=lambda x: (x[2] - x[0]) * (x[3] - x[1]))
            bboxs.append(largest_face)
            original_img = self.draw_face(original_img, largest_face)


        # Modify pixel values for other facial features
        for x1, y1, x2, y2, _ in other_detections:
            img[y1:y2, x1:x2] = 125

        return original_img, img, bboxs

    def draw_face(self, img, face, l=30, t=2, rt=1):
        x1, y1, x2, y2, confidence = face

        # Draw rectangle
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 255), rt)

        # Draw label
        # label = f"Face {confidence:.2f}"
        # cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)

        # Draw corners
        # Top left
        cv2.line(img, (x1, y1), (x1 + l, y1), (255, 0, 255), t)
        cv2.line(img, (x1, y1), (x1, y1 + l), (255, 0, 255), t)
        # Top right
        cv2.line(img, (x2, y1), (x2 - l, y1), (255, 0, 255), t)
        cv2.line(img, (x2, y1), (x2, y1 + l), (255, 0, 255), t)
        # Bottom left
        cv2.line(img, (x1, y2), (x1 + l, y2), (255, 0, 255), t)
        cv2.line(img, (x1, y2), (x1, y2 - l), (255, 0, 255), t)
        # Bottom right
        cv2.line(img, (x2, y2), (x2 - l, y2), (255, 0, 255), t)
        cv2.line(img, (x2, y2), (x2, y2 - l), (255, 0, 255), t)

        return img


# 使用示例
if __name__ == "__main__":
    print(torch.cuda.is_available())
    detector = FaceDetector()
    cap = cv2.VideoCapture(0)  # 使用默认摄像头

    while True:
        success, frame = cap.read()
        iii, img, bboxs = detector.find_faces(frame)

        cv2.imshow("Image", iii)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()